From b24b47f4e6de81bd6275d4187a51ca96a973a91f Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 23 Jun 2023 08:21:05 -0400 Subject: [PATCH 1/5] Add github action to codespell master on push and PRs --- .github/workflows/codespell.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 .github/workflows/codespell.yml diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml new file mode 100644 index 0000000..7373aff --- /dev/null +++ b/.github/workflows/codespell.yml @@ -0,0 +1,22 @@ +--- +name: Codespell + +on: + push: + branches: [master] + pull_request: + branches: [master] + +permissions: + contents: read + +jobs: + codespell: + name: Check for spelling errors + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Codespell + uses: codespell-project/actions-codespell@v2 From 2b47cb7ee0da6c86c6856cd3b2ae1f53d2b698c1 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 23 Jun 2023 08:21:05 -0400 Subject: [PATCH 2/5] Add rudimentary codespell config --- .codespellrc | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .codespellrc diff --git a/.codespellrc b/.codespellrc new file mode 100644 index 0000000..da3c057 --- /dev/null +++ b/.codespellrc @@ -0,0 +1,4 @@ +[codespell] +skip = .git,*.pdf,*.svg +# +# ignore-words-list = From eb7e07a3a27cb28ffb33a28c552cb97eb19bfcaa Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 23 Jun 2023 08:21:35 -0400 Subject: [PATCH 3/5] ignores --- .codespellrc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.codespellrc b/.codespellrc index da3c057..7877476 100644 --- a/.codespellrc +++ b/.codespellrc @@ -1,4 +1,5 @@ [codespell] skip = .git,*.pdf,*.svg -# -# ignore-words-list = +# clen - variable +# missings - there is an .md +ignore-words-list = clen,missings From 782ba757ed79da2b16f130dbf418dda63e0a4b46 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 23 Jun 2023 08:22:07 -0400 Subject: [PATCH 4/5] Fix ambigous typos --- docs/src/storage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/storage.md b/docs/src/storage.md index f420c0f..9b2565d 100644 --- a/docs/src/storage.md +++ b/docs/src/storage.md @@ -1,6 +1,6 @@ # Developing new storage backends -One advantage of the zarr data model is that it can be used in combiantion with a variety of storage backends. Currently in this package there is support for a `DictStore` (keeping data in memory), `DirectoryStore` (writing data to a local disk) and an `S3Store` for S3-compatible object store which is currently read-only. In oder to implement a new storage backend, you would have to create a subtype of `Zarr.AbstractStore` and implement the following methods: +One advantage of the zarr data model is that it can be used in combination with a variety of storage backends. Currently in this package there is support for a `DictStore` (keeping data in memory), `DirectoryStore` (writing data to a local disk) and an `S3Store` for S3-compatible object store which is currently read-only. In order to implement a new storage backend, you would have to create a subtype of `Zarr.AbstractStore` and implement the following methods: ```@meta CurrentModule = Zarr ``` From c94190473bb85ea39879d96bc9301b7bd7084a6a Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 23 Jun 2023 08:22:55 -0400 Subject: [PATCH 5/5] [DATALAD RUNCMD] run codespell throughout === Do not change lines below === { "chain": [], "cmd": "codespell -w", "exit": 0, "extra_inputs": [], "inputs": [], "outputs": [], "pwd": "." } ^^^ Do not change lines above ^^^ --- docs/src/missings.md | 2 +- docs/src/operations.md | 2 +- src/Storage/gcstore.jl | 2 +- src/ZArray.jl | 2 +- src/metadata.jl | 2 +- test/python.jl | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/src/missings.md b/docs/src/missings.md index 4a21926..492231c 100644 --- a/docs/src/missings.md +++ b/docs/src/missings.md @@ -58,7 +58,7 @@ julia> readdir(p) ## Dealing with Julia's Missing type in Zarr.jl -Like most data storage formats, also Zarr supports storing most of the standard C-compatible data types like integers, unsigned integers and floating point types of different sizes. This Means that it is no problem to directly map a `Vector{Int64}` to a Zarr array. However, the story gets complicated for arrays containing missings with a Union element type like `Union{Int64,Missing}`, since they can not be passed to compression lbraries as simple C pointers and are not very inter-operable with other lanugages. +Like most data storage formats, also Zarr supports storing most of the standard C-compatible data types like integers, unsigned integers and floating point types of different sizes. This Means that it is no problem to directly map a `Vector{Int64}` to a Zarr array. However, the story gets complicated for arrays containing missings with a Union element type like `Union{Int64,Missing}`, since they can not be passed to compression lbraries as simple C pointers and are not very inter-operable with other languages. One solution to this problem is to use Zarrs `fillvalue`s to represent missing values. Here we open the previously created array and use the `fill_as_missing` option. In this case accessing an uninitialized array member will return missing: diff --git a/docs/src/operations.md b/docs/src/operations.md index 7ecc88d..21902c7 100644 --- a/docs/src/operations.md +++ b/docs/src/operations.md @@ -2,7 +2,7 @@ A Zarr Array consists of a collection of potentially compressed chunks, and there is a significant overhead in accessing a single item from such an array compared to Julia's Base Array type. -In order to make operations on `ZArray`s still efficient, we use the [DiskArrays](https://github.com/meggart/DiskArrays.jl/) package which enbales efficient broadcast and reductions on `Zarray`s respecting their chunk sizes. This includes some modified behavior compared to a normal `AbstractArray`, including lazy broadcasting and a non-default array access order for reductions. +In order to make operations on `ZArray`s still efficient, we use the [DiskArrays](https://github.com/meggart/DiskArrays.jl/) package which enables efficient broadcast and reductions on `Zarray`s respecting their chunk sizes. This includes some modified behavior compared to a normal `AbstractArray`, including lazy broadcasting and a non-default array access order for reductions. Please refer to the DiskArrays documentation to see which operations are supported. diff --git a/src/Storage/gcstore.jl b/src/Storage/gcstore.jl index afad114..5f85820 100644 --- a/src/Storage/gcstore.jl +++ b/src/Storage/gcstore.jl @@ -21,7 +21,7 @@ end Zarr.gcs_credentials(; metadata_url = "http://metadata.google.internal/computeMetadata/v1/") Set (or renew) the user project, access token and and token type for the Google -Cloud Store from the Meatadata server (assuming the function is executed from +Cloud Store from the Metadata server (assuming the function is executed from the Google Cloud). For some data sets, the error message "Bucket is requester pays bucket but no user project provided" is returned if the credentials are not provided. diff --git a/src/ZArray.jl b/src/ZArray.jl index 6cc6c68..9d9d915 100644 --- a/src/ZArray.jl +++ b/src/ZArray.jl @@ -293,7 +293,7 @@ end """ zcreate(T, dims...;kwargs) -Creates a new empty zarr aray with element type `T` and array dimensions `dims`. The following keyword arguments are accepted: +Creates a new empty zarr array with element type `T` and array dimensions `dims`. The following keyword arguments are accepted: * `path=""` directory name to store a persistent array. If left empty, an in-memory array will be created * `name=""` name of the zarr array, defaults to the directory name diff --git a/src/metadata.jl b/src/metadata.jl index 8ea78a3..c7660e7 100644 --- a/src/metadata.jl +++ b/src/metadata.jl @@ -2,7 +2,7 @@ import Dates: Date, DateTime """NumPy array protocol type string (typestr) format -A string providing the basic type of the homogenous array. The basic string format +A string providing the basic type of the homogeneous array. The basic string format consists of 3 parts: a character describing the byteorder of the data (<: little-endian, >: big-endian, |: not-relevant), a character code giving the basic type of the array, and an integer providing the number of bytes the type uses. diff --git a/test/python.jl b/test/python.jl index e2568b0..8124e6e 100644 --- a/test/python.jl +++ b/test/python.jl @@ -1,7 +1,7 @@ ### ### This test is to check against the reference zarr implementation in Python ### We save some data in Julia and python and test if it is still the same -### when read from teh other language +### when read from the other language ### @testset "Python zarr implementation" begin