From 383d0fb1c4388b6d3860beb99a1509eb9e215946 Mon Sep 17 00:00:00 2001 From: Joao Aparicio Date: Mon, 10 Apr 2023 20:24:40 -0500 Subject: [PATCH 1/2] Pre-allocate buffer If we let transcode to its own allocation it will allocate a small vector, start filling it, resize the vector, fill it some more, resize the vector, etc. Instead in this commit we pre-allocate a vector of the corect size and pass it to transcode(). Inspired by https://github.com/apache/arrow-julia/pull/399 --- Project.toml | 2 ++ src/table.jl | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 1bd32848..7bb782d7 100644 --- a/Project.toml +++ b/Project.toml @@ -33,6 +33,7 @@ PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" SentinelArrays = "91c51154-3ec4-41a3-a24f-3f23e20d615c" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" TimeZones = "f269a46b-ccf7-5d73-abea-4c690281aa53" +TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" WorkerUtilities = "76eceee3-57b5-4d4a-8e66-0e911cebbf60" @@ -48,6 +49,7 @@ PooledArrays = "0.5, 1.0" SentinelArrays = "1" Tables = "1.1" TimeZones = "1" +TranscodingStreams = "0.10" WorkerUtilities = "1.1" julia = "1.6" diff --git a/src/table.jl b/src/table.jl index b1695e9b..db5184f2 100644 --- a/src/table.jl +++ b/src/table.jl @@ -521,10 +521,11 @@ function uncompress(ptr::Ptr{UInt8}, buffer, compression) len = unsafe_load(convert(Ptr{Int64}, ptr)) ptr += 8 # skip past uncompressed length as Int64 encodedbytes = unsafe_wrap(Array, ptr, buffer.length - 8) + decodedbytes = Vector{UInt8}(undef, len) if compression.codec === Meta.CompressionTypes.LZ4_FRAME - decodedbytes = transcode(LZ4FrameDecompressor, encodedbytes) + transcode(LZ4FrameDecompressor, encodedbytes, decodedbytes) elseif compression.codec === Meta.CompressionTypes.ZSTD - decodedbytes = transcode(ZstdDecompressor, encodedbytes) + transcode(ZstdDecompressor, encodedbytes, decodedbytes) else error("unsupported compression type when reading arrow buffers: $(typeof(compression.codec))") end From 5a69758271953552aa0a61bc29a0cbc7bcc233c9 Mon Sep 17 00:00:00 2001 From: Joao Aparicio Date: Tue, 11 Apr 2023 08:08:43 -0500 Subject: [PATCH 2/2] Bugfix compat --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 7bb782d7..bb4edd0a 100644 --- a/Project.toml +++ b/Project.toml @@ -49,7 +49,7 @@ PooledArrays = "0.5, 1.0" SentinelArrays = "1" Tables = "1.1" TimeZones = "1" -TranscodingStreams = "0.10" +TranscodingStreams = "0.9.12" WorkerUtilities = "1.1" julia = "1.6"