From 3ba559db6e61122ecd15a444211cc9bd2e673ec1 Mon Sep 17 00:00:00 2001 From: Jacob Quinn Date: Tue, 20 Jun 2023 10:14:02 -0600 Subject: [PATCH] Add package extension to support InlineStrings in Arrow.jl (#66) * Add package extension to support InlineStrings in Arrow.jl Fixes https://github.com/apache/arrow-julia/issues/196. This utilizes the new package extension feature of Julia 1.9 to add a conditional dependency on the ArrowTypes.jl package. With ArrowTypes.jl, it adds the necessary overloads to allow round- tripping of inline strings through the arrow format. Other language implementations will read them as normal strings, but in the Julia implementation, the additional type metadata signal that these strings were originally inline strings and can be deserialized as such. I'm explicitly not using the Requires.jl hack for backwards compat w/ older Julia versions because I like the idea of this being sort of a "beta" feature for users already using 1.9 to see if there are any unexpected issues that pop up for inline strings in the arrow format. * Only test package extension on 1.9 --- Project.toml | 9 ++++++++- ext/ArrowTypesExt.jl | 15 +++++++++++++++ ext/tests.jl | 8 ++++++++ test/runtests.jl | 5 +++++ 4 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 ext/ArrowTypesExt.jl create mode 100644 ext/tests.jl diff --git a/Project.toml b/Project.toml index 5c1dafd..ec5de11 100644 --- a/Project.toml +++ b/Project.toml @@ -7,6 +7,7 @@ version = "1.4.0" Parsers = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" [extras] +Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" @@ -15,5 +16,11 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Parsers = "2" julia = "1.6" +[weakdeps] +ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd" + +[extensions] +ArrowTypesExt = "ArrowTypes" + [targets] -test = ["Test", "Random", "Serialization"] +test = ["Arrow", "Test", "Random", "Serialization"] diff --git a/ext/ArrowTypesExt.jl b/ext/ArrowTypesExt.jl new file mode 100644 index 0000000..78727ba --- /dev/null +++ b/ext/ArrowTypesExt.jl @@ -0,0 +1,15 @@ +module ArrowTypesExt + +using ArrowTypes, InlineStrings + +for sz in (1, 4, 8, 16, 32, 64, 128, 256) + nm = Symbol(:InlineString, max(1, sz - 1)) + arrow_nm = Symbol("JuliaLang.InlineStrings.", nm) + @eval begin + ArrowTypes.arrowname(::Type{$nm}) = $(Meta.quot(arrow_nm)) + ArrowTypes.JuliaType(::Val{$(Meta.quot(arrow_nm))}) = $nm + ArrowTypes.fromarrow(::Type{$nm}, ptr::Ptr{UInt8}, len::Int) = $nm(ptr, len) + end +end + +end \ No newline at end of file diff --git a/ext/tests.jl b/ext/tests.jl new file mode 100644 index 0000000..d1cec4d --- /dev/null +++ b/ext/tests.jl @@ -0,0 +1,8 @@ +using Test, Arrow, InlineStrings + +@testset "basic Arrow.jl interop" begin + t = (x = inlinestrings(["a", "b", "sailor"]),) + t2 = Arrow.Table(Arrow.tobuffer(t)) + @test isequal(t.x, t2.x) + @test t2.x[1] isa InlineString +end diff --git a/test/runtests.jl b/test/runtests.jl index e139ebd..215277a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -562,3 +562,8 @@ end @test repr(String31["foo", "bar"]) == "String31[\"foo\", \"bar\"]" @test repr(InlineString[inline1"a", inline15"a"]) == "InlineString[String1(\"a\"), String15(\"a\")]" end + +# only test package extension on >= 1.9.0 +if VERSION >= v"1.9.0" +include(joinpath(dirname(pathof(InlineStrings)), "../ext/tests.jl")) +end