Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Artifact rework #47

Merged
merged 7 commits into from
Dec 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
/Manifest.toml
/docs/Manifest.toml
/docs/build/
/Artifacts.toml
47 changes: 0 additions & 47 deletions Artifacts.toml

This file was deleted.

10 changes: 5 additions & 5 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
name = "TimeZoneFinder"
uuid = "3ccf6684-3f25-4581-8c58-114637dcab4a"
authors = ["Tom Gillam <[email protected]>"]
version = "0.5.2"
version = "0.6.0"

[deps]
Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
Memoize = "c03570c3-d221-55d1-a50c-7939bbd78826"
Meshes = "eacbb407-ea5a-433e-ab97-5258b1ca43fa"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
Scratch = "6c6a2e73-6563-6170-7368-637461726353"
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
TimeZones = "f269a46b-ccf7-5d73-abea-4c690281aa53"
ZipArchives = "49080126-0e18-4c2a-b176-c102e4b3760c"

[compat]
Downloads = "1"
JSON3 = "1"
LazyArtifacts = "1"
Memoize = "0.4"
Meshes = "0.32,0.33,0.34,0.35,0.36, 0.37, 0.38, 0.39"
PrecompileTools = "1"
Scratch = "1"
TimeZones = "1.10"
ZipArchives = "1"
julia = "1.6"

[extras]
Expand Down
10 changes: 0 additions & 10 deletions artifact_build/Manifest.toml

This file was deleted.

2 changes: 0 additions & 2 deletions artifact_build/Project.toml

This file was deleted.

5 changes: 0 additions & 5 deletions artifact_build/README.md

This file was deleted.

49 changes: 0 additions & 49 deletions artifact_build/create.jl

This file was deleted.

98 changes: 85 additions & 13 deletions src/TimeZoneFinder.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@ module TimeZoneFinder

export timezone_at, timezones_at

using Downloads: download
using JSON3
using LazyArtifacts
using Memoize
using Meshes
using Pkg.Artifacts
using Pkg.TOML
using PrecompileTools
using Scratch
using Serialization
using TimeZones
using ZipArchives: ZipBufferReader, zip_names, zip_openentry

"""Get points that form a closed loop.

Expand Down Expand Up @@ -72,12 +73,60 @@ function Base.in(point::Point, bpa::BoundedPolyArea)
return in(point, bpa.polyarea)
end

"""
_get_artifact_path(version) -> String

Get the path to the artifact for `version`, e.g. "2023b".

This will download the necessary data if it doesn't already exist.
"""
function _get_artifact_path(version::AbstractString)
artifacts_toml = joinpath(dirname(@__DIR__), "Artifacts.toml")
artifact_name = "timezone-boundary-builder-$version"
hash = artifact_hash(artifact_name, artifacts_toml)

if !isnothing(hash) && artifact_exists(hash)
# The artifact is known, and exists on-disk, we can use it.
return artifact_path(hash)
end

# We need to download and extract the dataset.
# We aren't going to keep the zip archive around, so download to memory only, then
# decompress
hash = create_artifact() do artifact_dir
url = (
"https://github.com/evansiroky/timezone-boundary-builder/releases/download/" *
"$version/timezones-with-oceans.geojson.zip"
)
reader = ZipBufferReader(take!(download(url, IOBuffer())))
# We expect this archive to contain a single file, which we will
# extract into `artifact_dir`.
filename = only(zip_names(reader))
# We use `basename` here, since sometimes the archive includes an additional
# level of indirection. e.g. 2018d contains:
# dist/combined-with-oceans.json
# whereas more recent releases contain:
# combined-with-oceans.json
output_path = joinpath(artifact_dir, basename(filename))
zip_openentry(reader, filename) do io
open(output_path, "w") do f
write(f, read(io))
end
end
end

# We are happy to overwrite any existing mapping; this means that we set
# `force` to be true. (Otherwise we would fail here if e.g. the artifacts
# directory had been emptied).
bind_artifact!(artifacts_toml, artifact_name, hash; force=true)
return artifact_path(hash)
end

"""
Generate the timezone map data from the artifact identified by `version`.
"""
function generate_data(version::AbstractString)
artifact_name = "timezone-boundary-builder-$version"
dir = LazyArtifacts.@artifact_str(artifact_name)
dir = _get_artifact_path(version)
obj = open(JSON3.read, joinpath(dir, "combined-with-oceans.json"))

# Vectors that will be populated in the loop below.
Expand Down Expand Up @@ -147,16 +196,42 @@ Julia process.
end
end

function _read_gh_api_paginated(url::AbstractString, per_page::Int64, page::Int64)
return JSON3.read(
take!(download("$(url)?per_page=$(per_page)&page=$(page)", IOBuffer()))
)
end

function _read_gh_api_paginated(url::AbstractString)
responses = []
# TODO: This is the maximum per-page limit, at least for the "releases" command
per_page = 100
page = 1
while isempty(responses) || length(responses[end]) > 0
response = _read_gh_api_paginated(url, per_page, page)
push!(responses, response)
page += 1
end
return reduce(vcat, responses)
end

"""
_get_boundary_builder_versions()

Get a list of versions for we have boundary data. Will be e.g. `["2022a", "2023b"]`.
Get a list of versions for we have boundary data.

The list will be sorted in order of increasing versions.
Will be e.g. `["2022a", "2023b"]`. The list will be sorted in order of increasing versions.
"""
function _get_boundary_builder_versions()
toml = TOML.parsefile(find_artifacts_toml(@__FILE__))
return sort!([last(split(name, "-")) for name in keys(toml)])
@memoize function _get_boundary_builder_versions()
# TODO: There are some older versions than 2018d (back to 2016d), but these provide a differently named
# zip file. We could aim to support these if there is demand.

# NOTE: we are doing this manually to avoid a moderately heavy dependency on GitHub.jl
release_data = _read_gh_api_paginated(
"https://api.github.com/repos/evansiroky/timezone-boundary-builder/releases"
)
all_tags = [x[:tag_name] for x in release_data]
return sort(filter(tag -> tag >= "2018d", all_tags))
end

"""
Expand All @@ -170,7 +245,7 @@ the `TimeZones` package. The map from tzdata version -> boundary version is memo

This is determined by the rules in the "note" in the docstring for [`timezone_at`](@ref).
"""
@memoize function _timezone_boundary_builder_version(tzdata_version::AbstractString)
function _timezone_boundary_builder_version(tzdata_version::AbstractString)
boundary_builder_versions = _get_boundary_builder_versions()

i = searchsortedlast(boundary_builder_versions, tzdata_version)
Expand Down Expand Up @@ -254,7 +329,4 @@ function timezone_at(latitude::Real, longitude::Real)
return only(tzs)
end

# Precompile the primary API.
@compile_workload timezone_at(1.0, 1.0)

end
19 changes: 17 additions & 2 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
using Memoize
using Test
using TimeZoneFinder
using TimeZoneFinder: _timezone_boundary_builder_version
using TimeZoneFinder:
_get_artifact_path, _get_boundary_builder_versions, _timezone_boundary_builder_version
using TimeZones

"""
Expand Down Expand Up @@ -197,7 +198,7 @@ const TEST_LOCATIONS =

@testset "old tzdata versions" begin
# Run for several tzdata versions that we should be able to support.
for version in ["2021c", "2022d", "2022f"]
for version in ["2018d", "2021c", "2022d", "2022f"]
tzdata_context(version) do
@test timezone_at(52.5061, 13.358) == TimeZone("Europe/Berlin")
end
Expand All @@ -211,4 +212,18 @@ const TEST_LOCATIONS =
@test timezone_at(50.438114, 30.5179595) == TimeZone("Europe/Kyiv")
end
end

@testset "_get_artifact_path" begin
dir = _get_artifact_path("2023b")
@test isfile(joinpath(dir, "combined-with-oceans.json"))
dir2 = _get_artifact_path("2023b")
@test dir == dir2
end

@testset "_get_boundary_builder_versions" begin
versions = _get_boundary_builder_versions()
@test sort(versions) == versions
@test versions[1] == "2018d"
@test length(versions) >= 10
end
end
Loading