From fe25c4e491c42f8867b6271a0832f86bc38ec733 Mon Sep 17 00:00:00 2001 From: Ethan Meitz <54505069+ejmeitz@users.noreply.github.com> Date: Fri, 21 Jul 2023 16:56:12 -0400 Subject: [PATCH 01/12] start parallel update --- src/JLD2.jl | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/JLD2.jl b/src/JLD2.jl index 032ae02b..f7f5bede 100644 --- a/src/JLD2.jl +++ b/src/JLD2.jl @@ -297,18 +297,27 @@ FallbackType(::Type{IOStream}) = nothing read_bytestring(io::IOStream) = String(readuntil(io, 0x00)) const OPEN_FILES = Dict{String,WeakRef}() +const OPEN_PARALLEL_FILES = Dict{String,WeakRef}() #these files are read-only const OPEN_FILES_LOCK = ReentrantLock() function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool, iotype::T=MmapIO; fallback::Union{Type, Nothing} = FallbackType(iotype), compress=false, mmaparrays::Bool=false, typemap::Dict{String}=Dict{String,Any}(), + parallel_read::Bool=false, ) where T<:Union{Type{IOStream},Type{MmapIO}} mmaparrays && @warn "mmaparrays keyword is currently ignored" maxlog=1 verify_compressor(compress) exists = ispath(fname) - lock(OPEN_FILES_LOCK) + # Can only open multiple times if mode is "r" + if parallel_read && (wr, create, truncate) != (false, false, false) + throw(ArgumentError("Cannot open file in multiple threads unless mode is \"r\"")) + end + + #Do not lock file if user specifies parallel_read + parallel_read && lock(OPEN_FILES_LOCK) + f = try if exists rname = realpath(fname) @@ -316,6 +325,12 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool, if !isfile(rname) throw(ArgumentError("not a regular file: $fname")) end + + #Check that file is not open elsewhere in a non-read context + if parallel_read && haskey(OPEN_FILES, rname) + #TODO: + end + if haskey(OPEN_FILES, rname) ref = OPEN_FILES[rname] f = ref.value @@ -339,6 +354,7 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool, return f end end + end io = openfile(iotype, fname, wr, create, truncate, fallback) @@ -350,7 +366,7 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool, catch e rethrow(e) finally - unlock(OPEN_FILES_LOCK) + parallel_read && unlock(OPEN_FILES_LOCK) end if f.written f.base_address = 512 From 3c600ee88628e2d0f0ff7e68942ef85b1cb3a79c Mon Sep 17 00:00:00 2001 From: Ethan Meitz <54505069+ejmeitz@users.noreply.github.com> Date: Fri, 21 Jul 2023 17:00:41 -0400 Subject: [PATCH 02/12] Update JLD2.jl --- src/JLD2.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/JLD2.jl b/src/JLD2.jl index f7f5bede..a50bebc9 100644 --- a/src/JLD2.jl +++ b/src/JLD2.jl @@ -328,8 +328,13 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool, #Check that file is not open elsewhere in a non-read context if parallel_read && haskey(OPEN_FILES, rname) - #TODO: + ref = OPEN_FILES[rname] + f = ref.value + if !isnothing(f) + f.writable && throw(ArgumentError("Cannot open file multiple times unless mode is always \"r\". File was open elsewhere in a write mode.")) + end end + #TODO: Use dict of parallel files open if haskey(OPEN_FILES, rname) ref = OPEN_FILES[rname] From 363a8873bbfcbdc7cbf63a201ed58c845324e5ac Mon Sep 17 00:00:00 2001 From: ejmeitz <54505069+ejmeitz@users.noreply.github.com> Date: Sat, 22 Jul 2023 11:16:00 -0400 Subject: [PATCH 03/12] start parallel read capability --- src/JLD2.jl | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/src/JLD2.jl b/src/JLD2.jl index a50bebc9..3e5a4e2f 100644 --- a/src/JLD2.jl +++ b/src/JLD2.jl @@ -310,9 +310,9 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool, verify_compressor(compress) exists = ispath(fname) - # Can only open multiple times if mode is "r" + # Can only open multiple in parallel if mode is "r" if parallel_read && (wr, create, truncate) != (false, false, false) - throw(ArgumentError("Cannot open file in multiple threads unless mode is \"r\"")) + throw(ArgumentError("Cannot open file in a parallel context unless mode is \"r\"")) end #Do not lock file if user specifies parallel_read @@ -326,17 +326,7 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool, throw(ArgumentError("not a regular file: $fname")) end - #Check that file is not open elsewhere in a non-read context - if parallel_read && haskey(OPEN_FILES, rname) - ref = OPEN_FILES[rname] - f = ref.value - if !isnothing(f) - f.writable && throw(ArgumentError("Cannot open file multiple times unless mode is always \"r\". File was open elsewhere in a write mode.")) - end - end - #TODO: Use dict of parallel files open - - if haskey(OPEN_FILES, rname) + if !parallel_read && haskey(OPEN_FILES, rname) ref = OPEN_FILES[rname] f = ref.value if !isnothing(f) @@ -354,6 +344,16 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool, throw(ArgumentError("attempted to open file with mmaparrays=$(mmaparrays), but file was already open with mmaparrays=$(f.mmaparrays)")) end + f = f::JLDFile{iotype} + f.n_times_opened += 1 + return f + end + elseif parallel_read && haskey(OPEN_FILES, rname) + throw(ArgumentError("Tried to open file in a parallel context but it is open elsewhere in a serial context.")) + elseif parallel_read && haskey(OPEN_PARALLEL_FILES, rname) + ref = OPEN_PARALLEL_FILES[rname] + f = ref.value + if !isnothing(f) f = f::JLDFile{iotype} f.n_times_opened += 1 return f @@ -366,7 +366,11 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool, created = !exists || truncate rname = realpath(fname) f = JLDFile(io, rname, wr, created, compress, mmaparrays) - OPEN_FILES[rname] = WeakRef(f) + if parallel_read + OPEN_PARALLEL_FILES[rname] = WeakRef(f) + else + OPEN_FILES[rname] = WeakRef(f) + end f catch e rethrow(e) From 4c675e36e111ca2dd4c43214318b4075b5652093 Mon Sep 17 00:00:00 2001 From: ejmeitz <54505069+ejmeitz@users.noreply.github.com> Date: Sun, 23 Jul 2023 16:05:49 -0400 Subject: [PATCH 04/12] file handle created for every parallel_read --- Manifest.toml | 173 +++++++++++++++++++++++++++++++++++++++++++++++ src/JLD2.jl | 26 +++---- test/loadsave.jl | 53 +++++++++++++++ test/test.jld2 | Bin 0 -> 5379 bytes 4 files changed, 239 insertions(+), 13 deletions(-) create mode 100644 Manifest.toml create mode 100644 test/test.jld2 diff --git a/Manifest.toml b/Manifest.toml new file mode 100644 index 00000000..825094d9 --- /dev/null +++ b/Manifest.toml @@ -0,0 +1,173 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.9.0" +manifest_format = "2.0" +project_hash = "39130a0dfb70cea94feeb4775878868a507c8cd9" + +[[deps.ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" +version = "1.1.1" + +[[deps.Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" + +[[deps.Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[deps.Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[deps.Downloads]] +deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +version = "1.6.0" + +[[deps.FileIO]] +deps = ["Pkg", "Requires", "UUIDs"] +git-tree-sha1 = "299dc33549f68299137e51e6d49a13b5b1da9673" +uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" +version = "1.16.1" + +[[deps.FileWatching]] +uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" + +[[deps.InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[deps.LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" +version = "0.6.3" + +[[deps.LibCURL_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "7.84.0+0" + +[[deps.LibGit2]] +deps = ["Base64", "NetworkOptions", "Printf", "SHA"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[deps.LibSSH2_jll]] +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.10.2+0" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[deps.Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[deps.MacroTools]] +deps = ["Markdown", "Random"] +git-tree-sha1 = "42324d08725e200c23d4dfb549e0d5d89dede2d2" +uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +version = "0.5.10" + +[[deps.Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[deps.MbedTLS_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.28.2+0" + +[[deps.Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" + +[[deps.MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" +version = "2022.10.11" + +[[deps.NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +version = "1.2.0" + +[[deps.OrderedCollections]] +git-tree-sha1 = "2e73fe17cac3c62ad1aebe70d44c963c3cfdc3e3" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.6.2" + +[[deps.Pkg]] +deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +version = "1.9.0" + +[[deps.Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[deps.REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[deps.Random]] +deps = ["SHA", "Serialization"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[deps.Reexport]] +git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" +uuid = "189a3867-3050-52da-a836-e630ba90ab69" +version = "1.2.2" + +[[deps.Requires]] +deps = ["UUIDs"] +git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" +uuid = "ae029012-a4dd-5104-9daa-d747884805df" +version = "1.3.0" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[deps.Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[deps.TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" +version = "1.0.3" + +[[deps.Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +version = "1.10.0" + +[[deps.Test]] +deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[[deps.TranscodingStreams]] +deps = ["Random", "Test"] +git-tree-sha1 = "9a6ae7ed916312b41236fcef7e0af564ef934769" +uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" +version = "0.9.13" + +[[deps.UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[deps.Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[deps.Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.13+0" + +[[deps.nghttp2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.48.0+0" + +[[deps.p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" +version = "17.4.0+0" diff --git a/src/JLD2.jl b/src/JLD2.jl index 3e5a4e2f..71d9480c 100644 --- a/src/JLD2.jl +++ b/src/JLD2.jl @@ -316,7 +316,7 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool, end #Do not lock file if user specifies parallel_read - parallel_read && lock(OPEN_FILES_LOCK) + !parallel_read && lock(OPEN_FILES_LOCK) f = try if exists @@ -325,7 +325,16 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool, if !isfile(rname) throw(ArgumentError("not a regular file: $fname")) end + + #File can only be opened in parallel, or in serial as a stand alone instance + if !parallel_read && haskey(OPEN_PARALLEL_FILES, rname) + throw(ArgumentError("Cannot open file in serial context. It is open elsewhere in a parallel context.")) + end + if parallel_read && haskey(OPEN_FILES, rname) + throw(ArgumentError("Tried to open file in a parallel context but it is open elsewhere in a serial context.")) + end + # If in serial, return existing handle. In paralell always generate a new handle if !parallel_read && haskey(OPEN_FILES, rname) ref = OPEN_FILES[rname] f = ref.value @@ -344,38 +353,29 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool, throw(ArgumentError("attempted to open file with mmaparrays=$(mmaparrays), but file was already open with mmaparrays=$(f.mmaparrays)")) end - f = f::JLDFile{iotype} - f.n_times_opened += 1 - return f - end - elseif parallel_read && haskey(OPEN_FILES, rname) - throw(ArgumentError("Tried to open file in a parallel context but it is open elsewhere in a serial context.")) - elseif parallel_read && haskey(OPEN_PARALLEL_FILES, rname) - ref = OPEN_PARALLEL_FILES[rname] - f = ref.value - if !isnothing(f) f = f::JLDFile{iotype} f.n_times_opened += 1 return f end end - end io = openfile(iotype, fname, wr, create, truncate, fallback) created = !exists || truncate rname = realpath(fname) f = JLDFile(io, rname, wr, created, compress, mmaparrays) + if parallel_read OPEN_PARALLEL_FILES[rname] = WeakRef(f) else OPEN_FILES[rname] = WeakRef(f) end + f catch e rethrow(e) finally - parallel_read && unlock(OPEN_FILES_LOCK) + !parallel_read && unlock(OPEN_FILES_LOCK) end if f.written f.base_address = 512 diff --git a/test/loadsave.jl b/test/loadsave.jl index 61a24659..33765b33 100644 --- a/test/loadsave.jl +++ b/test/loadsave.jl @@ -600,6 +600,59 @@ JLD2.rconvert(::Type{CR}, dsa::CRSerialized) = CR(dsa.r) end end +# Test jldsave +@testset "Multi-threaded read" begin + fn = joinpath(mktempdir(), "test.jld2") + + jldsave(fn; a=1, b=2) + + ######################### + # Valid access patterns # + ######################### + + #Normal read + jldopen(fn, "r"; parallel_read = true) do f + @test f["a"] == 1 + @test f["b"] == 2 + end + + #Parallel read -- not guranteed to read at same time, but will test two handles being open + # Threads.@spawn begin + # jldopen(fn, "r"; parallel_read = true) do f + # @test f["a"] == 1 + # @test f["b"] == 2 + # sleep(15) #pause with file open + # end + # end + # Threads.@spawn begin + # jldopen(fn, "r"; parallel_read = true) do f + # @test f["a"] == 1 + # @test f["b"] == 2 + # end + # end + + ########################### + # Invalid access patterns # + ########################### + + # Open for non-read in parallel context + @test_throws ArgumentError jldopen(fn, "w"; parallel_read = true) do f end + @test_throws ArgumentError jldopen(fn, "w+"; parallel_read = true) do f end + @test_throws ArgumentError jldopen(fn, "r+"; parallel_read = true) do f end + @test_throws ArgumentError jldopen(fn, "a+"; parallel_read = true) do f end + @test_throws ArgumentError jldopen(fn, "a"; parallel_read = true) do f end + + #Open for writing in one context, open for reading in parallel context + # @test_throws ArgumentError Threads.@threads for i in 1:100 + # jldopen(fn, "r"; parallel_read = true) do f + # @test f["a"] == 1 + # @test f["b"] == 2 + # end + # jldopen(fn, "w") do f end + # end +end + + ################################################################################################### ## `Upgrade` Tests ################################################################################################### diff --git a/test/test.jld2 b/test/test.jld2 new file mode 100644 index 0000000000000000000000000000000000000000..4be4d700c9711707a73afcb66d548dbfa368960a GIT binary patch literal 5379 zcmeHL-AWrl6h5<~VcjM)in(a7hC*otT@s0r+=#_AhLTpQE%i<&xX5a_viz(XZFm5&T5Nc3v(Sg2)4zWgKj8A zL#9$R`$2s{{rUG?&|(&IP)?kE6ds+a=)H)?}MptLq)?aM@))~hRP?JPp& zKw~;yAt+nvCxNft_6LUFsSOzQumI2FvWxz6E8Tcm{Ae~~5KO>z&sRBnRPN2wvAKEe;gL^M6LMa7C zi_k(zhGd3cPHU60R>#rLC)gN?7~BKSou68maHjzkkK$0yIG-c{{`~qqU0>4I^GYBA zW^(a7u+jPr(9o|7`SzKY2 Date: Sun, 23 Jul 2023 16:07:59 -0400 Subject: [PATCH 05/12] always lock --- src/JLD2.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/JLD2.jl b/src/JLD2.jl index 71d9480c..6305d5fb 100644 --- a/src/JLD2.jl +++ b/src/JLD2.jl @@ -315,8 +315,7 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool, throw(ArgumentError("Cannot open file in a parallel context unless mode is \"r\"")) end - #Do not lock file if user specifies parallel_read - !parallel_read && lock(OPEN_FILES_LOCK) + lock(OPEN_FILES_LOCK) f = try if exists @@ -375,7 +374,7 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool, catch e rethrow(e) finally - !parallel_read && unlock(OPEN_FILES_LOCK) + unlock(OPEN_FILES_LOCK) end if f.written f.base_address = 512 From f6617066204ed4820d21bf416edafb444128fa60 Mon Sep 17 00:00:00 2001 From: ejmeitz <54505069+ejmeitz@users.noreply.github.com> Date: Sun, 23 Jul 2023 17:04:51 -0400 Subject: [PATCH 06/12] remove commented tests --- test/loadsave.jl | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/test/loadsave.jl b/test/loadsave.jl index 33765b33..31e3967b 100644 --- a/test/loadsave.jl +++ b/test/loadsave.jl @@ -616,21 +616,6 @@ end @test f["b"] == 2 end - #Parallel read -- not guranteed to read at same time, but will test two handles being open - # Threads.@spawn begin - # jldopen(fn, "r"; parallel_read = true) do f - # @test f["a"] == 1 - # @test f["b"] == 2 - # sleep(15) #pause with file open - # end - # end - # Threads.@spawn begin - # jldopen(fn, "r"; parallel_read = true) do f - # @test f["a"] == 1 - # @test f["b"] == 2 - # end - # end - ########################### # Invalid access patterns # ########################### @@ -642,14 +627,6 @@ end @test_throws ArgumentError jldopen(fn, "a+"; parallel_read = true) do f end @test_throws ArgumentError jldopen(fn, "a"; parallel_read = true) do f end - #Open for writing in one context, open for reading in parallel context - # @test_throws ArgumentError Threads.@threads for i in 1:100 - # jldopen(fn, "r"; parallel_read = true) do f - # @test f["a"] == 1 - # @test f["b"] == 2 - # end - # jldopen(fn, "w") do f end - # end end From 6f11910a4aa0413d9b5b0540238166f5e26ba4b7 Mon Sep 17 00:00:00 2001 From: Ethan Meitz <54505069+ejmeitz@users.noreply.github.com> Date: Sun, 23 Jul 2023 17:03:24 -0400 Subject: [PATCH 07/12] Delete Manifest.toml --- Manifest.toml | 173 -------------------------------------------------- 1 file changed, 173 deletions(-) delete mode 100644 Manifest.toml diff --git a/Manifest.toml b/Manifest.toml deleted file mode 100644 index 825094d9..00000000 --- a/Manifest.toml +++ /dev/null @@ -1,173 +0,0 @@ -# This file is machine-generated - editing it directly is not advised - -julia_version = "1.9.0" -manifest_format = "2.0" -project_hash = "39130a0dfb70cea94feeb4775878868a507c8cd9" - -[[deps.ArgTools]] -uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" -version = "1.1.1" - -[[deps.Artifacts]] -uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" - -[[deps.Base64]] -uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" - -[[deps.Dates]] -deps = ["Printf"] -uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" - -[[deps.Downloads]] -deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] -uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" -version = "1.6.0" - -[[deps.FileIO]] -deps = ["Pkg", "Requires", "UUIDs"] -git-tree-sha1 = "299dc33549f68299137e51e6d49a13b5b1da9673" -uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" -version = "1.16.1" - -[[deps.FileWatching]] -uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" - -[[deps.InteractiveUtils]] -deps = ["Markdown"] -uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" - -[[deps.LibCURL]] -deps = ["LibCURL_jll", "MozillaCACerts_jll"] -uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" -version = "0.6.3" - -[[deps.LibCURL_jll]] -deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] -uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" -version = "7.84.0+0" - -[[deps.LibGit2]] -deps = ["Base64", "NetworkOptions", "Printf", "SHA"] -uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" - -[[deps.LibSSH2_jll]] -deps = ["Artifacts", "Libdl", "MbedTLS_jll"] -uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" -version = "1.10.2+0" - -[[deps.Libdl]] -uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" - -[[deps.Logging]] -uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" - -[[deps.MacroTools]] -deps = ["Markdown", "Random"] -git-tree-sha1 = "42324d08725e200c23d4dfb549e0d5d89dede2d2" -uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" -version = "0.5.10" - -[[deps.Markdown]] -deps = ["Base64"] -uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" - -[[deps.MbedTLS_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" -version = "2.28.2+0" - -[[deps.Mmap]] -uuid = "a63ad114-7e13-5084-954f-fe012c677804" - -[[deps.MozillaCACerts_jll]] -uuid = "14a3606d-f60d-562e-9121-12d972cd8159" -version = "2022.10.11" - -[[deps.NetworkOptions]] -uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" -version = "1.2.0" - -[[deps.OrderedCollections]] -git-tree-sha1 = "2e73fe17cac3c62ad1aebe70d44c963c3cfdc3e3" -uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" -version = "1.6.2" - -[[deps.Pkg]] -deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] -uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -version = "1.9.0" - -[[deps.Printf]] -deps = ["Unicode"] -uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" - -[[deps.REPL]] -deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] -uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" - -[[deps.Random]] -deps = ["SHA", "Serialization"] -uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" - -[[deps.Reexport]] -git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" -uuid = "189a3867-3050-52da-a836-e630ba90ab69" -version = "1.2.2" - -[[deps.Requires]] -deps = ["UUIDs"] -git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" -uuid = "ae029012-a4dd-5104-9daa-d747884805df" -version = "1.3.0" - -[[deps.SHA]] -uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" -version = "0.7.0" - -[[deps.Serialization]] -uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" - -[[deps.Sockets]] -uuid = "6462fe0b-24de-5631-8697-dd941f90decc" - -[[deps.TOML]] -deps = ["Dates"] -uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" -version = "1.0.3" - -[[deps.Tar]] -deps = ["ArgTools", "SHA"] -uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" -version = "1.10.0" - -[[deps.Test]] -deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] -uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[[deps.TranscodingStreams]] -deps = ["Random", "Test"] -git-tree-sha1 = "9a6ae7ed916312b41236fcef7e0af564ef934769" -uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" -version = "0.9.13" - -[[deps.UUIDs]] -deps = ["Random", "SHA"] -uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" - -[[deps.Unicode]] -uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" - -[[deps.Zlib_jll]] -deps = ["Libdl"] -uuid = "83775a58-1f1d-513f-b197-d71354ab007a" -version = "1.2.13+0" - -[[deps.nghttp2_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" -version = "1.48.0+0" - -[[deps.p7zip_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" -version = "17.4.0+0" From f3078580b90ce4a0607a331dbeed490d5365cdef Mon Sep 17 00:00:00 2001 From: ejmeitz <54505069+ejmeitz@users.noreply.github.com> Date: Sun, 23 Jul 2023 17:07:22 -0400 Subject: [PATCH 08/12] delete temp file intests --- test/loadsave.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/loadsave.jl b/test/loadsave.jl index 31e3967b..6ee4f8e4 100644 --- a/test/loadsave.jl +++ b/test/loadsave.jl @@ -627,6 +627,8 @@ end @test_throws ArgumentError jldopen(fn, "a+"; parallel_read = true) do f end @test_throws ArgumentError jldopen(fn, "a"; parallel_read = true) do f end + + rm(fn; force = true, recursive = true) end From 67be4d276794a5e9100e65c9f838d020e1ff4e6b Mon Sep 17 00:00:00 2001 From: Jonas Isensee Date: Wed, 26 Jul 2023 08:34:26 +0200 Subject: [PATCH 09/12] fixes & changelog --- .gitignore | 5 ++++- CHANGELOG.md | 3 ++- Project.toml | 2 +- src/JLD2.jl | 52 ++++++++++++++++++++++++---------------------------- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/.gitignore b/.gitignore index 7868cd45..fbdf7513 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ *.jl.cov *.jl.mem - +*.jld2 +*.h5 +*.nc +*.jld /test/test_out.jld docs/build/ \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index edf5f2b4..58fb61c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ -## 0.4.35 +## 0.4.33 - fix `Upgrade` for parametric types - new type reconstruction when matching DataType cannot be found (eval-free) + - new `parallel_read` keyword for creating stand-alone file handles for multithreaded file reading (@ejmeitz) ## 0.4.32 - add experimental `JLD2.readas` function for customized reading of custom serialized objects (#468) diff --git a/Project.toml b/Project.toml index 5bf50fd9..305f31e3 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "JLD2" uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819" -version = "0.4.32" +version = "0.4.33" [deps] FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" diff --git a/src/JLD2.jl b/src/JLD2.jl index 6305d5fb..ec1dd9b5 100644 --- a/src/JLD2.jl +++ b/src/JLD2.jl @@ -297,7 +297,6 @@ FallbackType(::Type{IOStream}) = nothing read_bytestring(io::IOStream) = String(readuntil(io, 0x00)) const OPEN_FILES = Dict{String,WeakRef}() -const OPEN_PARALLEL_FILES = Dict{String,WeakRef}() #these files are read-only const OPEN_FILES_LOCK = ReentrantLock() function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool, iotype::T=MmapIO; fallback::Union{Type, Nothing} = FallbackType(iotype), @@ -321,40 +320,39 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool, if exists rname = realpath(fname) # catch existing file system entities that are not regular files - if !isfile(rname) - throw(ArgumentError("not a regular file: $fname")) - end + !isfile(rname) && throw(ArgumentError("not a regular file: $fname")) #File can only be opened in parallel, or in serial as a stand alone instance - if !parallel_read && haskey(OPEN_PARALLEL_FILES, rname) - throw(ArgumentError("Cannot open file in serial context. It is open elsewhere in a parallel context.")) - end if parallel_read && haskey(OPEN_FILES, rname) throw(ArgumentError("Tried to open file in a parallel context but it is open elsewhere in a serial context.")) end - # If in serial, return existing handle. In paralell always generate a new handle - if !parallel_read && haskey(OPEN_FILES, rname) + # If in serial, return existing handle. In parallel always generate a new handle + if haskey(OPEN_FILES, rname) ref = OPEN_FILES[rname] f = ref.value if !isnothing(f) - if truncate - throw(ArgumentError("attempted to truncate a file that was already open")) - elseif !isa(f, JLDFile{iotype}) - throw(ArgumentError("attempted to open file with $iotype backend, but already open with a different backend")) - elseif f.writable != wr - current = wr ? "read/write" : "read-only" - previous = f.writable ? "read/write" : "read-only" - throw(ArgumentError("attempted to open file $(current), but file was already open $(previous)")) - elseif f.compress != compress - throw(ArgumentError("attempted to open file with compress=$(compress), but file was already open with compress=$(f.compress)")) - elseif f.mmaparrays != mmaparrays - throw(ArgumentError("attempted to open file with mmaparrays=$(mmaparrays), but file was already open with mmaparrays=$(f.mmaparrays)")) + if parallel_read + f.writable && throw(ArgumentError("Tried to open file in a parallel context but it is open in write-mode elsewhere in a serial context.")) + else + if truncate + throw(ArgumentError("attempted to truncate a file that was already open")) + elseif !isa(f, JLDFile{iotype}) + throw(ArgumentError("attempted to open file with $iotype backend, but already open with a different backend")) + elseif f.writable != wr + current = wr ? "read/write" : "read-only" + previous = f.writable ? "read/write" : "read-only" + throw(ArgumentError("attempted to open file $(current), but file was already open $(previous)")) + elseif f.compress != compress + throw(ArgumentError("attempted to open file with compress=$(compress), but file was already open with compress=$(f.compress)")) + elseif f.mmaparrays != mmaparrays + throw(ArgumentError("attempted to open file with mmaparrays=$(mmaparrays), but file was already open with mmaparrays=$(f.mmaparrays)")) + end + + f = f::JLDFile{iotype} + f.n_times_opened += 1 + return f end - - f = f::JLDFile{iotype} - f.n_times_opened += 1 - return f end end end @@ -364,9 +362,7 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool, rname = realpath(fname) f = JLDFile(io, rname, wr, created, compress, mmaparrays) - if parallel_read - OPEN_PARALLEL_FILES[rname] = WeakRef(f) - else + if !parallel_read OPEN_FILES[rname] = WeakRef(f) end From 3849661536281704cc227033339d8796ff9043d0 Mon Sep 17 00:00:00 2001 From: Jonas Isensee Date: Wed, 26 Jul 2023 08:47:13 +0200 Subject: [PATCH 10/12] slightly refined tests and relaxed condition --- src/JLD2.jl | 5 ----- test/loadsave.jl | 12 ++++++++++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/JLD2.jl b/src/JLD2.jl index ec1dd9b5..afbb9f0c 100644 --- a/src/JLD2.jl +++ b/src/JLD2.jl @@ -322,11 +322,6 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool, # catch existing file system entities that are not regular files !isfile(rname) && throw(ArgumentError("not a regular file: $fname")) - #File can only be opened in parallel, or in serial as a stand alone instance - if parallel_read && haskey(OPEN_FILES, rname) - throw(ArgumentError("Tried to open file in a parallel context but it is open elsewhere in a serial context.")) - end - # If in serial, return existing handle. In parallel always generate a new handle if haskey(OPEN_FILES, rname) ref = OPEN_FILES[rname] diff --git a/test/loadsave.jl b/test/loadsave.jl index 6ee4f8e4..8dced5bd 100644 --- a/test/loadsave.jl +++ b/test/loadsave.jl @@ -614,8 +614,20 @@ end jldopen(fn, "r"; parallel_read = true) do f @test f["a"] == 1 @test f["b"] == 2 + @test fn ∉ keys(JLD2.OPEN_FILES) end + # Can read in parallel and serial (read-only) + f1 = jldopen(fn) + f2 = jldopen(fn; parallel_read = true) + @test JLD2.OPEN_FILES[fn] == f1 + @test f1 != f2 + close(f1); close(f2) + + f1 = jldopen(fn, "a") + @test_throws ArgumentError jldopen(fn; parallel_read = true) + close(f1) + ########################### # Invalid access patterns # ########################### From 015037c6f4d5deb17bcd37b04ac37b9742e2076c Mon Sep 17 00:00:00 2001 From: Jonas Isensee Date: Wed, 26 Jul 2023 08:48:35 +0200 Subject: [PATCH 11/12] remove accidental test.jld2 --- test/test.jld2 | Bin 5379 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 test/test.jld2 diff --git a/test/test.jld2 b/test/test.jld2 deleted file mode 100644 index 4be4d700c9711707a73afcb66d548dbfa368960a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5379 zcmeHL-AWrl6h5<~VcjM)in(a7hC*otT@s0r+=#_AhLTpQE%i<&xX5a_viz(XZFm5&T5Nc3v(Sg2)4zWgKj8A zL#9$R`$2s{{rUG?&|(&IP)?kE6ds+a=)H)?}MptLq)?aM@))~hRP?JPp& zKw~;yAt+nvCxNft_6LUFsSOzQumI2FvWxz6E8Tcm{Ae~~5KO>z&sRBnRPN2wvAKEe;gL^M6LMa7C zi_k(zhGd3cPHU60R>#rLC)gN?7~BKSou68maHjzkkK$0yIG-c{{`~qqU0>4I^GYBA zW^(a7u+jPr(9o|7`SzKY2 Date: Wed, 26 Jul 2023 08:56:26 +0200 Subject: [PATCH 12/12] use realpath --- test/loadsave.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/loadsave.jl b/test/loadsave.jl index 8dced5bd..73e221d0 100644 --- a/test/loadsave.jl +++ b/test/loadsave.jl @@ -620,7 +620,7 @@ end # Can read in parallel and serial (read-only) f1 = jldopen(fn) f2 = jldopen(fn; parallel_read = true) - @test JLD2.OPEN_FILES[fn] == f1 + @test JLD2.OPEN_FILES[realpath(fn)] == f1 @test f1 != f2 close(f1); close(f2)