From 500aa93121850d97c1c4ad6a5169f94c63d00338 Mon Sep 17 00:00:00 2001 From: James Sams Date: Thu, 2 Nov 2017 11:04:16 -0700 Subject: [PATCH 01/26] initial support for rds files --- src/RData.jl | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/RData.jl b/src/RData.jl index 1dd7a42..57e3e60 100644 --- a/src/RData.jl +++ b/src/RData.jl @@ -11,7 +11,8 @@ import FileIO: load export sexp2julia, DictoVec, - load # export FileIO.load() + load, # export FileIO.load() + readRDS include("config.jl") include("sxtypes.jl") @@ -80,4 +81,20 @@ end load(s::Stream{format"RData"}; kwoptions...) = load(s, kwoptions) +# TODO: +# * maybe throw error instead of warning on conversion? +# * tests +# * load stuff (e.g. FileIO req on detect_rdata) +# * maybe return tuple of (object, attribute_dict) for +# https://github.com/JuliaStats/RData.jl/issues/30 +function readRDS(f::AbstractString) + obj = gzopen(f) do io + ctx = RDAContext(rdaio(io, chomp(readline(io)))) #, kwoptions) + @assert ctx.fmtver == 2 # format version + #convert2julia = get(ctx.kwdict,:convert,true) + return readitem(ctx) + end + return sexp2julia(obj) +end + end # module From d9dc2ecfb4049b1a03056c983b9217759347c67b Mon Sep 17 00:00:00 2001 From: James Sams Date: Thu, 2 Nov 2017 14:09:52 -0700 Subject: [PATCH 02/26] add support for keyword arguments (can at least manually handle list of data frames now) --- src/RData.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/RData.jl b/src/RData.jl index 57e3e60..da6f0f8 100644 --- a/src/RData.jl +++ b/src/RData.jl @@ -87,14 +87,14 @@ load(s::Stream{format"RData"}; kwoptions...) = load(s, kwoptions) # * load stuff (e.g. FileIO req on detect_rdata) # * maybe return tuple of (object, attribute_dict) for # https://github.com/JuliaStats/RData.jl/issues/30 -function readRDS(f::AbstractString) +function readRDS(f::AbstractString; kwoptions...) obj = gzopen(f) do io - ctx = RDAContext(rdaio(io, chomp(readline(io)))) #, kwoptions) + ctx = RDAContext(rdaio(io, chomp(readline(io))), kwoptions) @assert ctx.fmtver == 2 # format version - #convert2julia = get(ctx.kwdict,:convert,true) - return readitem(ctx) + convert2julia = get(ctx.kwdict,:convert,true) + return convert2julia ? sexp2julia(readitem(ctx)) : readitem(ctx) end - return sexp2julia(obj) + return obj end end # module From 97d47a72761cae1f3f6396fbe014fafadc9209a5 Mon Sep 17 00:00:00 2001 From: James Sams Date: Mon, 20 Nov 2017 18:05:14 -0800 Subject: [PATCH 03/26] added tests and test data --- test/RDS.jl | 56 ++++++++++++++++++++++++++++++++++++ test/data/NAs.rds | Bin 0 -> 236 bytes test/data/NAs_ascii.rds | Bin 0 -> 232 bytes test/data/closures.rds | Bin 0 -> 334 bytes test/data/cmpfun.rds | Bin 0 -> 339 bytes test/data/envs.rds | Bin 0 -> 172 bytes test/data/envs_ascii.rds | Bin 0 -> 147 bytes test/data/minimal.rds | Bin 0 -> 132 bytes test/data/minimal_ascii.rds | Bin 0 -> 132 bytes test/data/names.rds | Bin 0 -> 239 bytes test/data/names_ascii.rds | Bin 0 -> 236 bytes test/data/pairlists.rds | Bin 0 -> 119 bytes test/data/types.rds | Bin 0 -> 220 bytes test/data/types_ascii.rds | Bin 0 -> 220 bytes test/generate_rda.R | 12 ++++++++ test/runtests.jl | 1 + 16 files changed, 69 insertions(+) create mode 100644 test/RDS.jl create mode 100644 test/data/NAs.rds create mode 100644 test/data/NAs_ascii.rds create mode 100644 test/data/closures.rds create mode 100644 test/data/cmpfun.rds create mode 100644 test/data/envs.rds create mode 100644 test/data/envs_ascii.rds create mode 100644 test/data/minimal.rds create mode 100644 test/data/minimal_ascii.rds create mode 100644 test/data/names.rds create mode 100644 test/data/names_ascii.rds create mode 100644 test/data/pairlists.rds create mode 100644 test/data/types.rds create mode 100644 test/data/types_ascii.rds diff --git a/test/RDS.jl b/test/RDS.jl new file mode 100644 index 0000000..735175b --- /dev/null +++ b/test/RDS.jl @@ -0,0 +1,56 @@ +module TestRDS + using Base.Test + using DataFrames + using RData + using Compat + + # think this is redundant for rds vs rda + # check for Float64 NA + # @test !RData.isna_float64(reinterpret(UInt64, 1.0)) + # @test !RData.isna_float64(reinterpret(UInt64, NaN)) + # @test !RData.isna_float64(reinterpret(UInt64, Inf)) + # @test !RData.isna_float64(reinterpret(UInt64, -Inf)) + # @test RData.isna_float64(reinterpret(UInt64, RData.R_NA_FLOAT64)) + # # check that alternative NA is also recognized (#10) + # @test RData.isna_float64(reinterpret(UInt64, RData.R_NA_FLOAT64 | ((Base.significand_mask(Float64) + 1) >> 1))) + + testdir = dirname(@__FILE__) + + df = DataFrame(num = [1.1, 2.2]) + @test isequal(sexp2julia(readRDS("$testdir/data/minimal.rds",convert=false))["df"], df) + @test isequal(readRDS("$testdir/data/minimal.rds",convert=true)["df"], df) + @test isequal(readRDS("$testdir/data/minimal_ascii.rds")["df"], df) + + df[:int] = Int32[1, 2] + df[:logi] = [true, false] + df[:chr] = ["ab", "c"] + df[:factor] = pool(df[:chr]) + df[:cplx] = Complex128[1.1+0.5im, 1.0im] + @test isequal(sexp2julia(readRDS("$testdir/data/types.rds",convert=false))["df"], df) + @test isequal(sexp2julia(readRDS("$testdir/data/types_ascii.rds",convert=false))["df"], df) + + df[2, :] = NA + append!(df, df[2, :]) + df[3, :num] = NaN + df[:, :cplx] = @data [NA, @compat(Complex128(1,NaN)), NaN] + @test isequal(sexp2julia(readRDS("$testdir/data/NAs.rds",convert=false))["df"], df) + # ASCII format saves NaN as NA + df[3, :num] = NA + df[:, :cplx] = @data [NA, NA, NA] + @test isequal(sexp2julia(readRDS("$testdir/data/NAs_ascii.rds",convert=false))["df"], df) + + rds_names = names(sexp2julia(readRDS("$testdir/data/names.rds",convert=false))["df"]) + expected_names = [:_end, :x!, :x1, :_B_C_, :x, :x_1] + @test rds_names == expected_names + rds_names = names(sexp2julia(readRDS("$testdir/data/names_ascii.rds",convert=false))["df"]) + @test rds_names == [:_end, :x!, :x1, :_B_C_, :x, :x_1] + + rds_envs = readRDS("$testdir/data/envs.rds",convert=false) + + rds_pairlists = readRDS("$testdir/data/pairlists.rds",convert=false) + + rds_closures = readRDS("$testdir/data/closures.rds",convert=false) + + rds_cmpfuns = readRDS("$testdir/data/cmpfun.rds",convert=false) +end + diff --git a/test/data/NAs.rds b/test/data/NAs.rds new file mode 100644 index 0000000000000000000000000000000000000000..96cdad15630a547a37cf310ecee2221269454ef7 GIT binary patch literal 236 zcmVR6u0H?E$K0@cS+nMZgYVihx>*)^8hV)J~4Ti>W zaR)bfb0I`DL@Y2yPqgpJJ38T3w2i6it8AaC%JyPEy$lMZSJ0D6R4)J1B^RYHtqdoGrNbc0h i>|m{TX1o%ONOFxE&8bus`c-U|Q}zv0O0BpB0ssK7{BWfJ literal 0 HcmV?d00001 diff --git a/test/data/closures.rds b/test/data/closures.rds new file mode 100644 index 0000000000000000000000000000000000000000..818ca3b5f8fa16d5c5ed804d8c5ea472134e17d3 GIT binary patch literal 334 zcmV-U0kQrciwFP!000001I?37OT$1Ah9@6QoA`ks-gBumq=@NV`~#&o!D|T{2*gy9 z6q0`}UW=}?&1_629#jw<*v#y^yEC&-*hdclOt5SdOiP8A_`$%zff|4THn`}WvWN?j z;Nlt1X2%tPEAk=5AKe{c=z0l??NdY$x3vv zbRl>n@AP-I-5;xUJ2iMi{tUG#$82rh*9d3S$60qg>oC$dCp-tk*WLXOhz)*^QRCa5Y#LsKYpV4c}_cV}iF**6;ibTCXEbVG!5{9s_=Kn*|x6Q<~`EDcf? z;^H;VCe;eS9eIypkLH?kdW@&TC}tR%P}_&HWfVsV%P_cYi7SUJXIcIb9+OAs$w3$7 z(LPzaPRjCQnk-*lHyhVh5>7TZPGMj5=r$0<+e|i~lXyjq2m3?WzYC?XxKg5l zB@4kPd8@sv?!Kzl?3Ca&ei>?=_t~0iUp<_D8)wn*EIeQ09H}`Je9hhefLQ1EXa#;w zXN+S`;MzzvuNJ3r@kWTx?;!1`lou2)enI(OrM&J?UX`v>c;!FgTah}ctjjKIn|HC! lI1IcW@z&(~F-V+M41(Q3yxjrKAJEn!`8TTg)iRj`007Ocp&tMM literal 0 HcmV?d00001 diff --git a/test/data/envs.rds b/test/data/envs.rds new file mode 100644 index 0000000000000000000000000000000000000000..52bfaa55d2750b7328b97acba4d06f7230c51d4d GIT binary patch literal 172 zcmb2|=3oE==I#ec2?+^F32BK*2}x{*t&J?&0-FsS4yY^qV?Ux$7^yLvgU6)5u~9IC zOTEL7MW{xhtvQ?dN=y>3lGI@tpM{4ro7ivg>ilO<+~IKd@Q?X5-xsKDQelfw`kc_f zn8l{FDRoU{=gyT!9oiT=n1y|=c%Rj7F8UE%%oli&yhPcW9VkiWoFpM9diFE69WSPVemD- literal 0 HcmV?d00001 diff --git a/test/data/minimal.rds b/test/data/minimal.rds new file mode 100644 index 0000000000000000000000000000000000000000..c50c234f22aad0d76a7ce7fb032320ea36674c94 GIT binary patch literal 132 zcmb2|=3oE==I#ec2?+^F32BK*2}x{*t&J?o%*@79(jG8#-m0I&$+THULeYeoIl$Iq zUTdSU$L43M&z6ZXOEP&hovB>9Gvk#-_ovfe6S68D*~}R_{{w-uxq+qnMv1!)>!uu; h9Gvk#-_ovfe6S68D*~}R_{{w-uxq+qnMv1!)>!uu; h0B|Z`-U|{J`i4!2fCD>5tXDUT%D_|?v zfRPiHjfrA637@Ez?7aB@yzu~l0e2ZNPH>9G0pRrZQAenJ`M8qYsv3VVxK~dwGNd@w zT+m#U&+G6_TAvGohR7XS(V%=^-q8uSqHR=FKaG!ZTIgQvrDmbF^hu>4`G)sZ>HbZoeKaAxXFNVf(tYT04Hyu&QN*yx|2Mv4ZgsBYL8}0ajLnY zAw_XV{=Dwb1c!!*16rp=`FcLm3AdtcWR;aArzERXFZR<*A|o0UOrlIwI=}Cdil{D1 zL~qpj7`w2ME03^M@;AmQi*t~RV^XPeFh!XU_t8mScU$?%lRVZ0mKv-Tuvhzy?`-}4 mN-BPlomo2|=f=WtLRHmF}Ii2 U!kT4GG84iWs*VdeJ_FhZ08|4dHUIzs literal 0 HcmV?d00001 diff --git a/test/data/types.rds b/test/data/types.rds new file mode 100644 index 0000000000000000000000000000000000000000..e6c2ff545ecfc63952c3c0020436bd47cc981ef2 GIT binary patch literal 220 zcmV<203-h&iwFP!000001AS0k4uUWYF54u5L?!VaTmg^a>l~mY1I)mmN3Wr$^9XtY z=6;xgJ=moCzII>Rb-Ne<5MY7;VNEyc2mr9gQ29zHv(H1@a~oJxxuvNyT7&T<)|Qx< zK%|lGM;0*E+AAfNTS3T+OToLp4Q2%k#%bsDzb++ZeJd0jl&_~NWG|Rm_I7O6HWKYM zZD?I|Zr8gyq?Cg{--#0QVK#}IbM0u=^dlv_Jyb5uj>3EFC}JmmvonzN){1l5)3g+7 WqFqg^h_6Cl~mY1I)mmN3Wr$^9XtY z=6;xgJ=moCzII>Rb-Ne<5MY7;VNEyc2mr9gQ29zHv(H1@a~oJxxuvNyT7&T<)|Qx< zK%|lGM;0*E+AAfNTS3T+OToLp4Q2%k#%bsDzb++ZeJd0jl&_~NWG|Rm_I7O6HWKYM zZD?I|Zr8gyq?Cg{--#0QVK#}IbM0u=^dlv_Jyb5uj>3EFC}JmmvonzN){1l5)3g+7 WqFqg^h_6C Date: Mon, 20 Nov 2017 18:11:48 -0800 Subject: [PATCH 04/26] update comments in generate_rda --- test/generate_rda.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/generate_rda.R b/test/generate_rda.R index 788595e..a1a7bba 100644 --- a/test/generate_rda.R +++ b/test/generate_rda.R @@ -1,4 +1,4 @@ -# R script to generate test .rda files +# R script to generate test .rda and .rds files df <- data.frame(num = c(1.1, 2.2)) save(df, file = "data/minimal.rda") @@ -50,7 +50,7 @@ test.cmpfun2 <- cmpfun( test.fun2 ) save(test.cmpfun0, test.cmpfun1, test.cmpfun2, file = "data/cmpfun.rda") -# for converting rda files to rds +# for converting rda files to rds to test with readRDS rdafiles = list.files("data/", pattern="*.rda", full.names=T) for (rdafile in rdafiles) { en = new.env() From de6e1875ab63441a4f9ed71003bec6759e055e1c Mon Sep 17 00:00:00 2001 From: James Sams Date: Mon, 20 Nov 2017 18:57:56 -0800 Subject: [PATCH 05/26] update readRDS to use new CodecZlib library --- src/RData.jl | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/RData.jl b/src/RData.jl index 5d60620..5645b29 100644 --- a/src/RData.jl +++ b/src/RData.jl @@ -95,13 +95,21 @@ load(s::Stream{format"RData"}; kwoptions...) = load(s, kwoptions) # * maybe return tuple of (object, attribute_dict) for # https://github.com/JuliaStats/RData.jl/issues/30 function readRDS(f::AbstractString; kwoptions...) - obj = gzopen(f) do io + io = open(f, "r") + try + gzipped = read(io, UInt8) == 0x1F && read(io, UInt8) == 0x8B # check GZip magic number + seekstart(io) + # if compressed, transcode gzipped stream + gzipped && (io = GzipDecompressorStream(io)) ctx = RDAContext(rdaio(io, chomp(readline(io))), kwoptions) @assert ctx.fmtver == 2 # format version convert2julia = get(ctx.kwdict,:convert,true) return convert2julia ? sexp2julia(readitem(ctx)) : readitem(ctx) + catch + rethrow() + finally + close(io) end - return obj end end # module From 4c2edd4e788a5e88ea07e0ed96c578aab865fc3d Mon Sep 17 00:00:00 2001 From: James Sams Date: Mon, 20 Nov 2017 18:58:13 -0800 Subject: [PATCH 06/26] update RDS tests to use testsets --- test/RDS.jl | 103 ++++++++++++++++++++++++++++------------------------ 1 file changed, 56 insertions(+), 47 deletions(-) diff --git a/test/RDS.jl b/test/RDS.jl index 735175b..e7f82fd 100644 --- a/test/RDS.jl +++ b/test/RDS.jl @@ -2,55 +2,64 @@ module TestRDS using Base.Test using DataFrames using RData - using Compat - - # think this is redundant for rds vs rda - # check for Float64 NA - # @test !RData.isna_float64(reinterpret(UInt64, 1.0)) - # @test !RData.isna_float64(reinterpret(UInt64, NaN)) - # @test !RData.isna_float64(reinterpret(UInt64, Inf)) - # @test !RData.isna_float64(reinterpret(UInt64, -Inf)) - # @test RData.isna_float64(reinterpret(UInt64, RData.R_NA_FLOAT64)) - # # check that alternative NA is also recognized (#10) - # @test RData.isna_float64(reinterpret(UInt64, RData.R_NA_FLOAT64 | ((Base.significand_mask(Float64) + 1) >> 1))) testdir = dirname(@__FILE__) - df = DataFrame(num = [1.1, 2.2]) - @test isequal(sexp2julia(readRDS("$testdir/data/minimal.rds",convert=false))["df"], df) - @test isequal(readRDS("$testdir/data/minimal.rds",convert=true)["df"], df) - @test isequal(readRDS("$testdir/data/minimal_ascii.rds")["df"], df) - - df[:int] = Int32[1, 2] - df[:logi] = [true, false] - df[:chr] = ["ab", "c"] - df[:factor] = pool(df[:chr]) - df[:cplx] = Complex128[1.1+0.5im, 1.0im] - @test isequal(sexp2julia(readRDS("$testdir/data/types.rds",convert=false))["df"], df) - @test isequal(sexp2julia(readRDS("$testdir/data/types_ascii.rds",convert=false))["df"], df) - - df[2, :] = NA - append!(df, df[2, :]) - df[3, :num] = NaN - df[:, :cplx] = @data [NA, @compat(Complex128(1,NaN)), NaN] - @test isequal(sexp2julia(readRDS("$testdir/data/NAs.rds",convert=false))["df"], df) - # ASCII format saves NaN as NA - df[3, :num] = NA - df[:, :cplx] = @data [NA, NA, NA] - @test isequal(sexp2julia(readRDS("$testdir/data/NAs_ascii.rds",convert=false))["df"], df) - - rds_names = names(sexp2julia(readRDS("$testdir/data/names.rds",convert=false))["df"]) - expected_names = [:_end, :x!, :x1, :_B_C_, :x, :x_1] - @test rds_names == expected_names - rds_names = names(sexp2julia(readRDS("$testdir/data/names_ascii.rds",convert=false))["df"]) - @test rds_names == [:_end, :x!, :x1, :_B_C_, :x, :x_1] - - rds_envs = readRDS("$testdir/data/envs.rds",convert=false) - - rds_pairlists = readRDS("$testdir/data/pairlists.rds",convert=false) - - rds_closures = readRDS("$testdir/data/closures.rds",convert=false) - - rds_cmpfuns = readRDS("$testdir/data/cmpfun.rds",convert=false) + @testset "RDS: Reading minimal rds" begin + df = DataFrame(num = [1.1, 2.2]) + @test isequal(sexp2julia(readRDS("$testdir/data/minimal.rds",convert=false))["df"], df) + @test isequal(readRDS("$testdir/data/minimal.rds",convert=true)["df"], df) + @test isequal(readRDS("$testdir/data/minimal_ascii.rds")["df"], df) + end + + @testset "RDS: Conversion to Julia types" begin + df = DataFrame(num = [1.1, 2.2], + int = Int32[1, 2], + logi = [true, false], + chr = ["ab", "c"], + factor = pool(["ab", "c"]), + cplx = Complex128[1.1+0.5im, 1.0im]) + rdf = sexp2julia(readRDS("$testdir/data/types.rds",convert=false))["df"] + @test eltypes(rdf) == eltypes(df) + @test isequal(rdf, df) + rdf_ascii = sexp2julia(readRDS("$testdir/data/types_ascii.rds",convert=false))["df"] + @test eltypes(rdf_ascii) == eltypes(df) + @test isequal(rdf_ascii, df) + end + + + @testset "RDS: NAs conversion" begin + df = DataFrame(num = [1.1, 2.2], + int = Int32[1, 2], + logi = [true, false], + chr = ["ab", "c"], + factor = pool(["ab", "c"]), + cplx = Complex128[1.1+0.5im, 1.0im]) + + df[2, :] = NA + append!(df, df[2, :]) + df[3, :num] = NaN + df[:, :cplx] = @data [NA, Complex128(1,NaN), NaN] + @test isequal(sexp2julia(readRDS("$testdir/data/NAs.rds",convert=false))["df"], df) + # ASCII format saves NaN as NA + df[3, :num] = NA + df[:, :cplx] = @data [NA, NA, NA] + @test isequal(sexp2julia(readRDS("$testdir/data/NAs_ascii.rds",convert=false))["df"], df) + end + + @testset "RDS: Column names conversion" begin + rds_names = names(sexp2julia(readRDS("$testdir/data/names.rds",convert=false))["df"]) + expected_names = [:_end, :x!, :x1, :_B_C_, :x, :x_1] + @test rds_names == expected_names + rds_names = names(sexp2julia(readRDS("$testdir/data/names_ascii.rds",convert=false))["df"]) + @test rds_names == [:_end, :x!, :x1, :_B_C_, :x, :x_1] + end + + @testset "RDS: Reading RDA with complex types (environments, closures etc)" begin + rds_envs = readRDS("$testdir/data/envs.rds",convert=false) + rds_pairlists = readRDS("$testdir/data/pairlists.rds",convert=false) + rds_closures = readRDS("$testdir/data/closures.rds",convert=false) + rds_cmpfuns = readRDS("$testdir/data/cmpfun.rds",convert=false) + end end From b676c468b5dfe2b4d5a1e4fae1bfd298ed4335f7 Mon Sep 17 00:00:00 2001 From: James Sams Date: Mon, 20 Nov 2017 21:43:08 -0800 Subject: [PATCH 07/26] remove todo comments --- src/RData.jl | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/RData.jl b/src/RData.jl index 5645b29..df4ef3a 100644 --- a/src/RData.jl +++ b/src/RData.jl @@ -88,12 +88,6 @@ end load(s::Stream{format"RData"}; kwoptions...) = load(s, kwoptions) -# TODO: -# * maybe throw error instead of warning on conversion? -# * tests -# * load stuff (e.g. FileIO req on detect_rdata) -# * maybe return tuple of (object, attribute_dict) for -# https://github.com/JuliaStats/RData.jl/issues/30 function readRDS(f::AbstractString; kwoptions...) io = open(f, "r") try From d0fbcdcefee5926b7322058a6d2a0f2a17364e83 Mon Sep 17 00:00:00 2001 From: James Sams Date: Tue, 21 Nov 2017 16:27:33 -0800 Subject: [PATCH 08/26] factor out decompress function --- src/RData.jl | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/RData.jl b/src/RData.jl index df4ef3a..4cb5562 100644 --- a/src/RData.jl +++ b/src/RData.jl @@ -41,13 +41,20 @@ include("readers.jl") ## ############################################################################## +function decompress(io) + # check GZip magic number + gzipped = read(io, UInt8) == 0x1F && read(io, UInt8) == 0x8B + seekstart(io) + if gzipped + io = GzipDecompressorStream(io) + end + return io +end + function load(f::File{format"RData"}; kwoptions...) io = open(filename(f), "r") try - gzipped = read(io, UInt8) == 0x1F && read(io, UInt8) == 0x8B # check GZip magic number - seekstart(io) - # if compressed, transcode gzipped stream - gzipped && (io = GzipDecompressorStream(io)) + io = decompress(io) return load(Stream(f, io), kwoptions) catch rethrow() @@ -91,10 +98,7 @@ load(s::Stream{format"RData"}; kwoptions...) = load(s, kwoptions) function readRDS(f::AbstractString; kwoptions...) io = open(f, "r") try - gzipped = read(io, UInt8) == 0x1F && read(io, UInt8) == 0x8B # check GZip magic number - seekstart(io) - # if compressed, transcode gzipped stream - gzipped && (io = GzipDecompressorStream(io)) + io = decompress(io) ctx = RDAContext(rdaio(io, chomp(readline(io))), kwoptions) @assert ctx.fmtver == 2 # format version convert2julia = get(ctx.kwdict,:convert,true) From 363d47c20513608d2103042d73ecdafa8a10861d Mon Sep 17 00:00:00 2001 From: James Sams Date: Tue, 21 Nov 2017 16:39:19 -0800 Subject: [PATCH 09/26] minimize testing of rds files --- test/RDS.jl | 49 ++++-------------------------------- test/data/NAs.rds | Bin 236 -> 0 bytes test/data/NAs_ascii.rds | Bin 232 -> 0 bytes test/data/closures.rds | Bin 334 -> 0 bytes test/data/cmpfun.rds | Bin 339 -> 0 bytes test/data/envs.rds | Bin 172 -> 0 bytes test/data/envs_ascii.rds | Bin 147 -> 0 bytes test/data/minimal.rds | Bin 132 -> 0 bytes test/data/minimal_ascii.rds | Bin 132 -> 0 bytes test/data/names.rds | Bin 239 -> 0 bytes test/data/names_ascii.rds | Bin 236 -> 0 bytes test/data/pairlists.rds | Bin 119 -> 0 bytes test/data/types.rds | Bin 220 -> 210 bytes test/data/types_ascii.rds | Bin 220 -> 198 bytes test/data/types_decomp.rds | Bin 0 -> 436 bytes test/generate_rda.R | 14 +++-------- 16 files changed, 8 insertions(+), 55 deletions(-) delete mode 100644 test/data/NAs.rds delete mode 100644 test/data/NAs_ascii.rds delete mode 100644 test/data/closures.rds delete mode 100644 test/data/cmpfun.rds delete mode 100644 test/data/envs.rds delete mode 100644 test/data/envs_ascii.rds delete mode 100644 test/data/minimal.rds delete mode 100644 test/data/minimal_ascii.rds delete mode 100644 test/data/names.rds delete mode 100644 test/data/names_ascii.rds delete mode 100644 test/data/pairlists.rds create mode 100644 test/data/types_decomp.rds diff --git a/test/RDS.jl b/test/RDS.jl index e7f82fd..007434b 100644 --- a/test/RDS.jl +++ b/test/RDS.jl @@ -5,13 +5,6 @@ module TestRDS testdir = dirname(@__FILE__) - @testset "RDS: Reading minimal rds" begin - df = DataFrame(num = [1.1, 2.2]) - @test isequal(sexp2julia(readRDS("$testdir/data/minimal.rds",convert=false))["df"], df) - @test isequal(readRDS("$testdir/data/minimal.rds",convert=true)["df"], df) - @test isequal(readRDS("$testdir/data/minimal_ascii.rds")["df"], df) - end - @testset "RDS: Conversion to Julia types" begin df = DataFrame(num = [1.1, 2.2], int = Int32[1, 2], @@ -19,47 +12,15 @@ module TestRDS chr = ["ab", "c"], factor = pool(["ab", "c"]), cplx = Complex128[1.1+0.5im, 1.0im]) - rdf = sexp2julia(readRDS("$testdir/data/types.rds",convert=false))["df"] + rdf = sexp2julia(readRDS("$testdir/data/types.rds",convert=false)) @test eltypes(rdf) == eltypes(df) @test isequal(rdf, df) - rdf_ascii = sexp2julia(readRDS("$testdir/data/types_ascii.rds",convert=false))["df"] + rdf_ascii = sexp2julia(readRDS("$testdir/data/types_ascii.rds",convert=false)) @test eltypes(rdf_ascii) == eltypes(df) @test isequal(rdf_ascii, df) - end - - - @testset "RDS: NAs conversion" begin - df = DataFrame(num = [1.1, 2.2], - int = Int32[1, 2], - logi = [true, false], - chr = ["ab", "c"], - factor = pool(["ab", "c"]), - cplx = Complex128[1.1+0.5im, 1.0im]) - - df[2, :] = NA - append!(df, df[2, :]) - df[3, :num] = NaN - df[:, :cplx] = @data [NA, Complex128(1,NaN), NaN] - @test isequal(sexp2julia(readRDS("$testdir/data/NAs.rds",convert=false))["df"], df) - # ASCII format saves NaN as NA - df[3, :num] = NA - df[:, :cplx] = @data [NA, NA, NA] - @test isequal(sexp2julia(readRDS("$testdir/data/NAs_ascii.rds",convert=false))["df"], df) - end - - @testset "RDS: Column names conversion" begin - rds_names = names(sexp2julia(readRDS("$testdir/data/names.rds",convert=false))["df"]) - expected_names = [:_end, :x!, :x1, :_B_C_, :x, :x_1] - @test rds_names == expected_names - rds_names = names(sexp2julia(readRDS("$testdir/data/names_ascii.rds",convert=false))["df"]) - @test rds_names == [:_end, :x!, :x1, :_B_C_, :x, :x_1] - end - - @testset "RDS: Reading RDA with complex types (environments, closures etc)" begin - rds_envs = readRDS("$testdir/data/envs.rds",convert=false) - rds_pairlists = readRDS("$testdir/data/pairlists.rds",convert=false) - rds_closures = readRDS("$testdir/data/closures.rds",convert=false) - rds_cmpfuns = readRDS("$testdir/data/cmpfun.rds",convert=false) + rdf_decomp = sexp2julia(readRDS("$testdir/data/types_decomp.rds",convert=false)) + @test eltypes(rdf_decomp) == eltypes(df) + @test isequal(rdf_decomp, df) end end diff --git a/test/data/NAs.rds b/test/data/NAs.rds deleted file mode 100644 index 96cdad15630a547a37cf310ecee2221269454ef7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 236 zcmVR6u0H?E$K0@cS+nMZgYVihx>*)^8hV)J~4Ti>W zaR)bfb0I`DL@Y2yPqgpJJ38T3w2i6it8AaC%JyPEy$lMZSJ0D6R4)J1B^RYHtqdoGrNbc0h i>|m{TX1o%ONOFxE&8bus`c-U|Q}zv0O0BpB0ssK7{BWfJ diff --git a/test/data/closures.rds b/test/data/closures.rds deleted file mode 100644 index 818ca3b5f8fa16d5c5ed804d8c5ea472134e17d3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 334 zcmV-U0kQrciwFP!000001I?37OT$1Ah9@6QoA`ks-gBumq=@NV`~#&o!D|T{2*gy9 z6q0`}UW=}?&1_629#jw<*v#y^yEC&-*hdclOt5SdOiP8A_`$%zff|4THn`}WvWN?j z;Nlt1X2%tPEAk=5AKe{c=z0l??NdY$x3vv zbRl>n@AP-I-5;xUJ2iMi{tUG#$82rh*9d3S$60qg>oC$dCp-tk*WLXOhz)*^QRCa5Y#LsKYpV4c}_cV}iF**6;ibTCXEbVG!5{9s_=Kn*|x6Q<~`EDcf? z;^H;VCe;eS9eIypkLH?kdW@&TC}tR%P}_&HWfVsV%P_cYi7SUJXIcIb9+OAs$w3$7 z(LPzaPRjCQnk-*lHyhVh5>7TZPGMj5=r$0<+e|i~lXyjq2m3?WzYC?XxKg5l zB@4kPd8@sv?!Kzl?3Ca&ei>?=_t~0iUp<_D8)wn*EIeQ09H}`Je9hhefLQ1EXa#;w zXN+S`;MzzvuNJ3r@kWTx?;!1`lou2)enI(OrM&J?UX`v>c;!FgTah}ctjjKIn|HC! lI1IcW@z&(~F-V+M41(Q3yxjrKAJEn!`8TTg)iRj`007Ocp&tMM diff --git a/test/data/envs.rds b/test/data/envs.rds deleted file mode 100644 index 52bfaa55d2750b7328b97acba4d06f7230c51d4d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 172 zcmb2|=3oE==I#ec2?+^F32BK*2}x{*t&J?&0-FsS4yY^qV?Ux$7^yLvgU6)5u~9IC zOTEL7MW{xhtvQ?dN=y>3lGI@tpM{4ro7ivg>ilO<+~IKd@Q?X5-xsKDQelfw`kc_f zn8l{FDRoU{=gyT!9oiT=n1y|=c%Rj7F8UE%%oli&yhPcW9VkiWoFpM9diFE69WSPVemD- diff --git a/test/data/minimal.rds b/test/data/minimal.rds deleted file mode 100644 index c50c234f22aad0d76a7ce7fb032320ea36674c94..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 132 zcmb2|=3oE==I#ec2?+^F32BK*2}x{*t&J?o%*@79(jG8#-m0I&$+THULeYeoIl$Iq zUTdSU$L43M&z6ZXOEP&hovB>9Gvk#-_ovfe6S68D*~}R_{{w-uxq+qnMv1!)>!uu; h9Gvk#-_ovfe6S68D*~}R_{{w-uxq+qnMv1!)>!uu; h0B|Z`-U|{J`i4!2fCD>5tXDUT%D_|?v zfRPiHjfrA637@Ez?7aB@yzu~l0e2ZNPH>9G0pRrZQAenJ`M8qYsv3VVxK~dwGNd@w zT+m#U&+G6_TAvGohR7XS(V%=^-q8uSqHR=FKaG!ZTIgQvrDmbF^hu>4`G)sZ>HbZoeKaAxXFNVf(tYT04Hyu&QN*yx|2Mv4ZgsBYL8}0ajLnY zAw_XV{=Dwb1c!!*16rp=`FcLm3AdtcWR;aArzERXFZR<*A|o0UOrlIwI=}Cdil{D1 zL~qpj7`w2ME03^M@;AmQi*t~RV^XPeFh!XU_t8mScU$?%lRVZ0mKv-Tuvhzy?`-}4 mN-BPlomo2|=f=WtLRHmF}Ii2 U!kT4GG84iWs*VdeJ_FhZ08|4dHUIzs diff --git a/test/data/types.rds b/test/data/types.rds index e6c2ff545ecfc63952c3c0020436bd47cc981ef2..d13b9299e9489144428ee1fdcb8749931a08d373 100644 GIT binary patch delta 193 zcmV;y06zcR0n!1G8Gl{@kKyZ_pdAU+8}Nd1!lX0+X>fANT-)&GD$#mJBt6NN2vYC=j^X8)XI?LMXFKCY!$t zCId4edE@xMEjbr$tq^tS%dv#?1rtl(4o%iZs=lr(US^HcdOF9@_kHKzccf(aFw>;f vxpc6oS__%14~1)sqw*eWN#Z1L<_76KS@E8?e4Q)2sHJ@Ymdk#=v;hDB?`d6f delta 203 zcmV;+05t#70o(zQ8Gl>>kKyYapdhU)y!N7yuAp zf&gJnH|hugu*OjNN++|=L)&v3SX8;CsWV!G@g&xkn3zDMk?uzpFxA>CC6-%3$cszC zyT1))1q;S$=k&iWC1rgp6dRPUrz>PHm|6CAY}Pgs?KN#^T|{(l*Sk8Tl!HIti4yZ+ zHi?^a?P%5XBPF~&R4&bq!h7r}VkdsHGm!MwigVi2v=nNhT}`WquR?jLZ@+DnGxXX4 F00499VTS+! diff --git a/test/data/types_ascii.rds b/test/data/types_ascii.rds index e6c2ff545ecfc63952c3c0020436bd47cc981ef2..feb67c68b4bcf1b99eb8712f4932b6321663d118 100644 GIT binary patch literal 198 zcmV;%06G63iwFP!000001AS0i4uc>N{GYp&xFDdPUZ8iNR9h3&2JPec1s}EAq#uM? zW@mPm0vwcUB~p-}EKQ&@4FyUg!3dm%Tf`!sWCRIF1;7<287XsGFu?```m}Qu!Ne)J z@=?0}uXrnwu+H?O;>)HocIZ5~44LH|Ghs5u+Uaxn?#Wuyl>M~CvA|)UOJZ(N!Fzra zXz$Hh)a|eoDO`7{SIS=fZ?dJ;-rQ@Nrj_1o5q|P)2E!^JW{<190hzNlb&3H10KNcQ Aod5s; literal 220 zcmV<203-h&iwFP!000001AS0k4uUWYF54u5L?!VaTmg^a>l~mY1I)mmN3Wr$^9XtY z=6;xgJ=moCzII>Rb-Ne<5MY7;VNEyc2mr9gQ29zHv(H1@a~oJxxuvNyT7&T<)|Qx< zK%|lGM;0*E+AAfNTS3T+OToLp4Q2%k#%bsDzb++ZeJd0jl&_~NWG|Rm_I7O6HWKYM zZD?I|Zr8gyq?Cg{--#0QVK#}IbM0u=^dlv_Jyb5uj>3EFC}JmmvonzN){1l5)3g+7 WqFqg^h_6Ct$EVlO(|H8F063*B5kK5yyR(zs+1+kp zLWoZ&^NCLZ=?YH>pKCsS-?cr~o{8XQnumKfntqdmBFlkiB+`*@i9w!#^iL~B>NZml z%IuuU=C8n{ArO)`u6|U>xoB$zl@7juk0+#0WM7uPTr!c3RDDS*US?W)Soj9e_dNtB zg@G&52Z#0wTAfMkGd$$DQnS{#&xp Date: Tue, 21 Nov 2017 17:28:27 -0800 Subject: [PATCH 10/26] replace readRDS with load interface * requires FileIO rdata_single branch to be pulled https://github.com/jsams/FileIO.jl/tree/rdata_single/ --- src/RData.jl | 21 +++++++++++++++------ test/RDS.jl | 6 +++--- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/RData.jl b/src/RData.jl index 4cb5562..f4c3b06 100644 --- a/src/RData.jl +++ b/src/RData.jl @@ -95,14 +95,12 @@ end load(s::Stream{format"RData"}; kwoptions...) = load(s, kwoptions) -function readRDS(f::AbstractString; kwoptions...) - io = open(f, "r") + +function load(f::File{format"RDataSingle"}; kwoptions...) + io = open(filename(f), "r") try io = decompress(io) - ctx = RDAContext(rdaio(io, chomp(readline(io))), kwoptions) - @assert ctx.fmtver == 2 # format version - convert2julia = get(ctx.kwdict,:convert,true) - return convert2julia ? sexp2julia(readitem(ctx)) : readitem(ctx) + return load(Stream(f, io), kwoptions) catch rethrow() finally @@ -110,4 +108,15 @@ function readRDS(f::AbstractString; kwoptions...) end end +function load(s::Stream{format"RDataSingle"}, kwoptions::Vector{Any}) + io = stream(s) + @assert FileIO.detect_rdata_single(io) + ctx = RDAContext(rdaio(io, chomp(readline(io))), kwoptions) + @assert ctx.fmtver == 2 # format version + convert2julia = get(ctx.kwdict,:convert,true) + return convert2julia ? sexp2julia(readitem(ctx)) : readitem(ctx) +end + +load(s::Stream{format"RDataSingle"}; kwoptions...) = load(s, kwoptions) + end # module diff --git a/test/RDS.jl b/test/RDS.jl index 007434b..54ed2b7 100644 --- a/test/RDS.jl +++ b/test/RDS.jl @@ -12,13 +12,13 @@ module TestRDS chr = ["ab", "c"], factor = pool(["ab", "c"]), cplx = Complex128[1.1+0.5im, 1.0im]) - rdf = sexp2julia(readRDS("$testdir/data/types.rds",convert=false)) + rdf = sexp2julia(load("$testdir/data/types.rds",convert=false)) @test eltypes(rdf) == eltypes(df) @test isequal(rdf, df) - rdf_ascii = sexp2julia(readRDS("$testdir/data/types_ascii.rds",convert=false)) + rdf_ascii = sexp2julia(load("$testdir/data/types_ascii.rds",convert=false)) @test eltypes(rdf_ascii) == eltypes(df) @test isequal(rdf_ascii, df) - rdf_decomp = sexp2julia(readRDS("$testdir/data/types_decomp.rds",convert=false)) + rdf_decomp = sexp2julia(load("$testdir/data/types_decomp.rds",convert=false)) @test eltypes(rdf_decomp) == eltypes(df) @test isequal(rdf_decomp, df) end From 8d8c1b7da05771d2341653e509a61ff3f69871b4 Mon Sep 17 00:00:00 2001 From: James Sams Date: Wed, 22 Nov 2017 01:51:15 -0800 Subject: [PATCH 11/26] remove readRDS from export list --- src/RData.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/RData.jl b/src/RData.jl index f4c3b06..8e8d8b2 100644 --- a/src/RData.jl +++ b/src/RData.jl @@ -9,8 +9,7 @@ import FileIO: load export sexp2julia, DictoVec, - load, # export FileIO.load() - readRDS + load # export FileIO.load() include("config.jl") include("sxtypes.jl") From 0b6d9bfde9c56d3ff92bbfab0a0f414716e69b45 Mon Sep 17 00:00:00 2001 From: James Sams Date: Wed, 22 Nov 2017 14:10:39 -0800 Subject: [PATCH 12/26] add tests for convert=true with load of rds files --- test/RDS.jl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/RDS.jl b/test/RDS.jl index 54ed2b7..181b1a1 100644 --- a/test/RDS.jl +++ b/test/RDS.jl @@ -21,6 +21,16 @@ module TestRDS rdf_decomp = sexp2julia(load("$testdir/data/types_decomp.rds",convert=false)) @test eltypes(rdf_decomp) == eltypes(df) @test isequal(rdf_decomp, df) + + rdf = load("$testdir/data/types.rds") + @test eltypes(rdf) == eltypes(df) + @test isequal(rdf, df) + rdf_ascii = load("$testdir/data/types_ascii.rds") + @test eltypes(rdf_ascii) == eltypes(df) + @test isequal(rdf_ascii, df) + rdf_decomp = load("$testdir/data/types_decomp.rds") + @test eltypes(rdf_decomp) == eltypes(df) + @test isequal(rdf_decomp, df) end end From 74713ef5d467bc6d0e6366f947a5d5bbbc5a7073 Mon Sep 17 00:00:00 2001 From: James Sams Date: Wed, 22 Nov 2017 14:17:03 -0800 Subject: [PATCH 13/26] mention rds support in news --- NEWS.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/NEWS.md b/NEWS.md index 71406f8..bcf2050 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +## RData v0.3.0 Release Notes + +##### Changes +* add support for rds files (single object data files from R) [#22], [#33] + +[#22]: https://github.com/JuliaStats/RData.jl/issues/22 +[#33]: https://github.com/JuliaStats/RData.jl/issues/33 + ## RData v0.2.0 Release Notes Updated to Julia v0.6 (older versions not supported). From 45478346ee625f111cb6579f37a1b410409005a0 Mon Sep 17 00:00:00 2001 From: James Sams Date: Thu, 23 Nov 2017 09:29:39 -0800 Subject: [PATCH 14/26] add test for isa DataFrame for rds files --- test/RDS.jl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/RDS.jl b/test/RDS.jl index 181b1a1..01dc425 100644 --- a/test/RDS.jl +++ b/test/RDS.jl @@ -13,22 +13,32 @@ module TestRDS factor = pool(["ab", "c"]), cplx = Complex128[1.1+0.5im, 1.0im]) rdf = sexp2julia(load("$testdir/data/types.rds",convert=false)) + @test rdf isa DataFrame @test eltypes(rdf) == eltypes(df) @test isequal(rdf, df) + rdf_ascii = sexp2julia(load("$testdir/data/types_ascii.rds",convert=false)) + @test rdf_ascii isa DataFrame @test eltypes(rdf_ascii) == eltypes(df) @test isequal(rdf_ascii, df) + rdf_decomp = sexp2julia(load("$testdir/data/types_decomp.rds",convert=false)) + @test rdf_decomp isa DataFrame @test eltypes(rdf_decomp) == eltypes(df) @test isequal(rdf_decomp, df) rdf = load("$testdir/data/types.rds") + @test rdf isa DataFrame @test eltypes(rdf) == eltypes(df) @test isequal(rdf, df) + rdf_ascii = load("$testdir/data/types_ascii.rds") + @test rdf_ascii isa DataFrame @test eltypes(rdf_ascii) == eltypes(df) @test isequal(rdf_ascii, df) + rdf_decomp = load("$testdir/data/types_decomp.rds") + @test rdf_decomp isa DataFrame @test eltypes(rdf_decomp) == eltypes(df) @test isequal(rdf_decomp, df) end From 8057bce547d5a04d9def2164fc97e3534c68379d Mon Sep 17 00:00:00 2001 From: James Sams Date: Thu, 23 Nov 2017 22:28:13 -0800 Subject: [PATCH 15/26] support for R Dates and POSIXct, excluding timezone --- src/convert.jl | 42 +++++++++++++++++++++++++++++++++++++++- test/RDS.jl | 33 +++++++++++++++++++++++++++++++ test/data/dates.rds | Bin 0 -> 138 bytes test/data/datesNA.rds | Bin 0 -> 139 bytes test/data/datetimes.rds | Bin 0 -> 172 bytes test/generate_rda.R | 13 +++++++++++++ 6 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 test/data/dates.rds create mode 100644 test/data/datesNA.rds create mode 100644 test/data/datetimes.rds diff --git a/src/convert.jl b/src/convert.jl index 217eb1a..7b6b407 100644 --- a/src/convert.jl +++ b/src/convert.jl @@ -54,7 +54,11 @@ function sexp2julia(rv::RVEC) # FIXME forceDataArrays option to always convert to DataArray nas = namask(rv) hasna = any(nas) - if hasnames(rv) + if class(rv) == ["Date"] + return date2julia(rv, hasna, nas) + elseif class(rv) == ["POSIXct"; "POSIXt"] + return datetime2julia(rv, hasna, nas) + elseif hasnames(rv) # if data has no NA, convert to simple Vector return DictoVec(hasna ? DataArray(rv.data, nas) : rv.data, names(rv)) else @@ -87,3 +91,39 @@ function sexp2julia(rl::RList) map(sexp2julia, rl.data) end end + +function date2julia(rv, hasna, nas) + @assert class(rv) == ["Date"] + epoch_conv = 719528 # Dates.date2epochdays(Date("1970-01-01")) + if hasna + warn("Date contains NA, not representable in Julia, replacing with 0001-01-01") + dates = [isna ? Date() : Dates.epochdays2date(dtfloat + epoch_conv) + for (isna, dtfloat) in zip(nas, rv.data)] + else + dates = Dates.epochdays2date.(rv.data .+ epoch_conv) + end + if hasnames(rv) + dates = DictoVec(dates, names(rv)) + end + return length(dates) == 1 & !hasnames(rv) ? dates[1] : dates +end + +# does not handle timezone differences because R stores the timezone in the "Z" +# format, but this is ambiguous therefore TimeZones.jl can't convert would be +# nice if there was an option to be more specific about the timezone +# ref: http://timezonesjl.readthedocs.io/en/latest/conversions/ +function datetime2julia(rv, hasna, nas) + @assert class(rv) == ["POSIXct"; "POSIXt"] + if hasna + warn("DateTime contains NA, not representable in Julia, replacing with 0001-01-01T00:00:00") + datetimes = [isna ? DateTime() : Dates.unix2datetime(dtfloat) + for (isna, dtfloat) in zip(nas, rv.data)] + else + datetimes = Dates.unix2datetime.(rv.data) + end + if hasnames(rv) + datetimes = DictoVec(datetimes, names(rv)) + end + return length(datetimes) == 1 & !hasnames(rv) ? datetimes[1] : datetimes +end + diff --git a/test/RDS.jl b/test/RDS.jl index 01dc425..9404c54 100644 --- a/test/RDS.jl +++ b/test/RDS.jl @@ -42,5 +42,38 @@ module TestRDS @test eltypes(rdf_decomp) == eltypes(df) @test isequal(rdf_decomp, df) end + + @testset "Test Date conversion" begin + dates = load("$testdir/data/dates.rds") + @test dates[1] == Date("2017-01-01") + Dates.Day.(1:4) + @test dates[2] == Date("2017-01-02") + @test dates[3] isa DictoVec + @test dates[3].data == Date("2017-01-01") + Dates.Day.(1:4) + @test [dates[3].index2name[i] for i in 1:length(dates[3])] == ["A", "B", "C", "D"] + @test dates[4] isa DictoVec + @test dates[4].data == [Date("2017-01-02")] + @test dates[4].index2name[1] == "A" + end + + @testset "Test DateTime conversion" begin + datetimes = load("$testdir/data/datetimes.rds") + @test datetimes[1] == DateTime("2017-01-01T13:23") + Dates.Second.(1:4) + @test datetimes[2] == DateTime("2017-01-01T13:23:01") + @test datetimes[3] isa DictoVec + @test datetimes[3].data == DateTime("2017-01-01T13:23") + Dates.Second.(1:4) + @test [datetimes[3].index2name[i] for i in 1:length(datetimes[3])] == ["A", "B", "C", "D"] + @test datetimes[4] isa DictoVec + @test datetimes[4].data == [DateTime("2017-01-01T13:23:01")] + @test datetimes[4].index2name[1] == "A" + end + + @testset "Test NA Date and DateTime conversion" begin + @test_warn ("Date contains NA, not representable in Julia, replacing with 0001-01-01", + "DateTime contains NA, not representable in Julia, replacing with 0001-01-01T00:00:00") begin + dates = load("$testdir/data/datesNA.rds") + @test dates[1] == [Date("2017-01-01") + Dates.Day.(1:4); Date()] + @test dates[2] == [DateTime("2017-01-01T13:23") + Dates.Second.(1:4); Date()] + end + end end diff --git a/test/data/dates.rds b/test/data/dates.rds new file mode 100644 index 0000000000000000000000000000000000000000..43915b2c1a15e69e3a0becbced87a2285d8a04a7 GIT binary patch literal 138 zcmb2|=3oE==I#ec2?+^F32BK*2}x{5k}ZuhS{oZ@^1E2aSTG4CGhE^e*v052&){vq z@YT@3fT^^ADf8NsIY%BPDL6BDPB^N%QAO0_f3wsn4r7r;Ge6ohes!^8nr^v*?~L~u p-&V^BtXd~ta3otClsIS5#+}!0yfB)>q`je^;j4%1x?DyE1^`y=Fl+z- literal 0 HcmV?d00001 diff --git a/test/data/datesNA.rds b/test/data/datesNA.rds new file mode 100644 index 0000000000000000000000000000000000000000..4e454e7c238703d3aed786852713f31f0ecd4ac4 GIT binary patch literal 139 zcmb2|=3oE==I#ec2?+^F32BK*2}x{5k}U^o*wXA2&b}&m$l=k$=wa>fl1-zQ!R2~A z!^Ah;5)uu+BqbyyBP1*we@!YD>h5mjVN`Jxn|@?ch{9j~v>lI4HQTp3Bp)nco?^$a q@Oq-exgYXba>7?^zA^kyIN$L^NMkzh8C}Lm28QVe+U2hR-2ebAU^4dr literal 0 HcmV?d00001 diff --git a/test/data/datetimes.rds b/test/data/datetimes.rds new file mode 100644 index 0000000000000000000000000000000000000000..ee944a4a346bbb2cb38448d35aa04cc2b7136657 GIT binary patch literal 172 zcmV;d08{@TiwFP!000001B>8dU|?WkU}j-rU}6R`8H9nDg@J*Y4@5g&lX6(=z`y{e z8=&+72)z!ZngOVffq@aIgA+)zCg&s;7XujrU_&8->;eA4o)O6Fe-bQW`j6jiB7I5?J5l|emzbLh3o#a`AfqFU;RIti a!x%2O1B4mfKA<^N3@HF|eZv(h0ssJWJ3s3H literal 0 HcmV?d00001 diff --git a/test/generate_rda.R b/test/generate_rda.R index ca393fc..e046493 100644 --- a/test/generate_rda.R +++ b/test/generate_rda.R @@ -52,3 +52,16 @@ test.cmpfun1 <- cmpfun( test.fun1 ) test.cmpfun2 <- cmpfun( test.fun2 ) save(test.cmpfun0, test.cmpfun1, test.cmpfun2, file = "data/cmpfun.rda") +dates = as.Date("2017-01-01") + 1:4 +datetimes = as.POSIXct("2017-01-01 13:23", tz="GMT") + 1:4 +dateNAs = list(c(dates, NA), c(datetimes, NA)) +saveRDS(dateNAs, file="data/datesNA.rds") +datelst = list(dates, dates[1]) +names(dates) = LETTERS[1:length(dates)] +datelst = c(datelst, list(dates), list(dates[1])) +saveRDS(datelst, file="data/dates.rds") +dtlst = list(datetimes, datetimes[1]) +names(datetimes) = LETTERS[1:length(datetimes)] +dtlst = c(dtlst, list(datetimes), list(datetimes[1])) +saveRDS(dtlst, file="data/datetimes.rds") + From 0ceae8e84812ad8bfa54fa7dd22651cf44f9caa5 Mon Sep 17 00:00:00 2001 From: James Sams Date: Thu, 23 Nov 2017 22:54:16 -0800 Subject: [PATCH 16/26] support for NA dates and datetimes --- NEWS.md | 2 ++ src/convert.jl | 12 ++++++------ test/RDS.jl | 16 ++++++++++------ 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/NEWS.md b/NEWS.md index bcf2050..1ae6cbe 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,9 +2,11 @@ ##### Changes * add support for rds files (single object data files from R) [#22], [#33] +* add support for Date and POSIXct, though still lacking timezone handling [#34] [#22]: https://github.com/JuliaStats/RData.jl/issues/22 [#33]: https://github.com/JuliaStats/RData.jl/issues/33 +[#34]: https://github.com/JuliaStats/RData.jl/issues/34 ## RData v0.2.0 Release Notes diff --git a/src/convert.jl b/src/convert.jl index 7b6b407..adb3a38 100644 --- a/src/convert.jl +++ b/src/convert.jl @@ -96,9 +96,9 @@ function date2julia(rv, hasna, nas) @assert class(rv) == ["Date"] epoch_conv = 719528 # Dates.date2epochdays(Date("1970-01-01")) if hasna - warn("Date contains NA, not representable in Julia, replacing with 0001-01-01") - dates = [isna ? Date() : Dates.epochdays2date(dtfloat + epoch_conv) - for (isna, dtfloat) in zip(nas, rv.data)] + dates = DataArray([isna ? Date() : Dates.epochdays2date(dtfloat + epoch_conv) + for (isna, dtfloat) in zip(nas, rv.data)], + nas) else dates = Dates.epochdays2date.(rv.data .+ epoch_conv) end @@ -115,9 +115,9 @@ end function datetime2julia(rv, hasna, nas) @assert class(rv) == ["POSIXct"; "POSIXt"] if hasna - warn("DateTime contains NA, not representable in Julia, replacing with 0001-01-01T00:00:00") - datetimes = [isna ? DateTime() : Dates.unix2datetime(dtfloat) - for (isna, dtfloat) in zip(nas, rv.data)] + datetimes = DataArray([isna ? DateTime() : Dates.unix2datetime(dtfloat) + for (isna, dtfloat) in zip(nas, rv.data)], + nas) else datetimes = Dates.unix2datetime.(rv.data) end diff --git a/test/RDS.jl b/test/RDS.jl index 9404c54..d29268c 100644 --- a/test/RDS.jl +++ b/test/RDS.jl @@ -68,12 +68,16 @@ module TestRDS end @testset "Test NA Date and DateTime conversion" begin - @test_warn ("Date contains NA, not representable in Julia, replacing with 0001-01-01", - "DateTime contains NA, not representable in Julia, replacing with 0001-01-01T00:00:00") begin - dates = load("$testdir/data/datesNA.rds") - @test dates[1] == [Date("2017-01-01") + Dates.Day.(1:4); Date()] - @test dates[2] == [DateTime("2017-01-01T13:23") + Dates.Second.(1:4); Date()] - end + dates = load("$testdir/data/datesNA.rds") + testdates = RData.DataArray([Date("2017-01-01") + Dates.Day.(1:4); Date()], + BitArray([false, false, false, false, true])) + @test dates[1][1:4] == testdates[1:4] + @test RData.isna(dates[1][5]) + + testdts = RData.DataArray([DateTime("2017-01-01T13:23") + Dates.Second.(1:4); Date()], + BitArray([false, false, false, false, true])) + @test dates[2][1:4] == testdts[1:4] + @test RData.isna(dates[2][5]) end end From 4c032bacce009ed5ae99090791079902861e7d39 Mon Sep 17 00:00:00 2001 From: James Sams Date: Sat, 25 Nov 2017 06:07:49 -0800 Subject: [PATCH 17/26] use constants for referring to R's date and datetime classes --- src/config.jl | 4 ++++ src/convert.jl | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/config.jl b/src/config.jl index 9300576..bd5740e 100644 --- a/src/config.jl +++ b/src/config.jl @@ -37,3 +37,7 @@ const Hash = Dict{RString, Any} const emptyhash = Hash() const emptyhashkey = RString("\0") + +const R_Date_Class = ["Date"] +const R_POSIXct_Class = ["POSIXct", "POSIXt"] + diff --git a/src/convert.jl b/src/convert.jl index adb3a38..dfa87bc 100644 --- a/src/convert.jl +++ b/src/convert.jl @@ -54,9 +54,9 @@ function sexp2julia(rv::RVEC) # FIXME forceDataArrays option to always convert to DataArray nas = namask(rv) hasna = any(nas) - if class(rv) == ["Date"] + if class(rv) == R_Date_Class return date2julia(rv, hasna, nas) - elseif class(rv) == ["POSIXct"; "POSIXt"] + elseif class(rv) == R_POSIXct_Class return datetime2julia(rv, hasna, nas) elseif hasnames(rv) # if data has no NA, convert to simple Vector @@ -93,7 +93,7 @@ function sexp2julia(rl::RList) end function date2julia(rv, hasna, nas) - @assert class(rv) == ["Date"] + @assert class(rv) == R_Date_Class epoch_conv = 719528 # Dates.date2epochdays(Date("1970-01-01")) if hasna dates = DataArray([isna ? Date() : Dates.epochdays2date(dtfloat + epoch_conv) @@ -113,7 +113,7 @@ end # nice if there was an option to be more specific about the timezone # ref: http://timezonesjl.readthedocs.io/en/latest/conversions/ function datetime2julia(rv, hasna, nas) - @assert class(rv) == ["POSIXct"; "POSIXt"] + @assert class(rv) == R_POSIXct_Class if hasna datetimes = DataArray([isna ? DateTime() : Dates.unix2datetime(dtfloat) for (isna, dtfloat) in zip(nas, rv.data)], From cb877c376530715f37ea49482663d10b9353c70e Mon Sep 17 00:00:00 2001 From: James Sams Date: Sun, 26 Nov 2017 16:14:28 -0800 Subject: [PATCH 18/26] use TimeZones to support R's POSIXct --- REQUIRE | 1 + src/RData.jl | 2 +- src/convert.jl | 29 +++++++++++++++++++++++------ test/RDS.jl | 35 ++++++++++++++++++++++++++--------- test/data/datetimes_tz.rds | Bin 0 -> 145 bytes test/generate_rda.R | 9 +++++++++ 6 files changed, 60 insertions(+), 16 deletions(-) create mode 100644 test/data/datetimes_tz.rds diff --git a/REQUIRE b/REQUIRE index 29030fa..abb3044 100644 --- a/REQUIRE +++ b/REQUIRE @@ -3,3 +3,4 @@ DataFrames 0.9 DataArrays 0.4 FileIO 0.1.2 CodecZlib 0.4 +TimeZones diff --git a/src/RData.jl b/src/RData.jl index 8e8d8b2..c64f3b7 100644 --- a/src/RData.jl +++ b/src/RData.jl @@ -2,7 +2,7 @@ __precompile__() module RData -using DataFrames, DataArrays, CodecZlib, FileIO +using DataFrames, DataArrays, CodecZlib, FileIO, TimeZones import DataFrames: identifier import FileIO: load diff --git a/src/convert.jl b/src/convert.jl index dfa87bc..e692882 100644 --- a/src/convert.jl +++ b/src/convert.jl @@ -1,6 +1,8 @@ # converters from selected RSEXPREC to Hash # They are used to translate SEXPREC attributes into Hash +import TimeZones: unix2zdt + function Base.convert(::Type{Hash}, pl::RPairList) res = Hash() for i in 1:length(pl.items) @@ -108,18 +110,33 @@ function date2julia(rv, hasna, nas) return length(dates) == 1 & !hasnames(rv) ? dates[1] : dates end -# does not handle timezone differences because R stores the timezone in the "Z" -# format, but this is ambiguous therefore TimeZones.jl can't convert would be -# nice if there was an option to be more specific about the timezone -# ref: http://timezonesjl.readthedocs.io/en/latest/conversions/ +# return tuple is true/false status of whether tzattr was successfully interpreted +# then the tz itself. when not successfully interpreted, tz is always localzone() +function gettz(tzattr) + try + return true, TimeZone(tzattr) + catch ArgumentError + warn("Could not determine timezone of '$(tzattr)', treating as if UTC.") + return false, tz"UTC" + end +end + +function unix2zdt(seconds::Real; tz::TimeZone=tz"UTC") + ZonedDateTime(Dates.unix2datetime(seconds), tz, from_utc=true) +end + function datetime2julia(rv, hasna, nas) @assert class(rv) == R_POSIXct_Class + tzattr = getattr(rv, "tzone", ["UTC"])[1] + tzattr = tzattr == "" ? "UTC" : tzattr # R will store a blank for tzone + goodtz, tz = gettz(tzattr) if hasna - datetimes = DataArray([isna ? DateTime() : Dates.unix2datetime(dtfloat) + nadt = ZonedDateTime(DateTime(), tz) + datetimes = DataArray([isna ? nadt : unix2zdt(dtfloat, tz=tz) for (isna, dtfloat) in zip(nas, rv.data)], nas) else - datetimes = Dates.unix2datetime.(rv.data) + datetimes = unix2zdt.(rv.data, tz=tz) end if hasnames(rv) datetimes = DictoVec(datetimes, names(rv)) diff --git a/test/RDS.jl b/test/RDS.jl index d29268c..a68df28 100644 --- a/test/RDS.jl +++ b/test/RDS.jl @@ -1,7 +1,9 @@ module TestRDS using Base.Test + using DataArrays using DataFrames using RData + using TimeZones testdir = dirname(@__FILE__) @@ -57,27 +59,42 @@ module TestRDS @testset "Test DateTime conversion" begin datetimes = load("$testdir/data/datetimes.rds") - @test datetimes[1] == DateTime("2017-01-01T13:23") + Dates.Second.(1:4) - @test datetimes[2] == DateTime("2017-01-01T13:23:01") + testdts = ZonedDateTime.(DateTime("2017-01-01T13:23") + Dates.Second.(1:4), + TimeZone("UTC")) + @test datetimes[1] == testdts + @test datetimes[2] == testdts[1] @test datetimes[3] isa DictoVec - @test datetimes[3].data == DateTime("2017-01-01T13:23") + Dates.Second.(1:4) + @test datetimes[3].data == testdts @test [datetimes[3].index2name[i] for i in 1:length(datetimes[3])] == ["A", "B", "C", "D"] @test datetimes[4] isa DictoVec - @test datetimes[4].data == [DateTime("2017-01-01T13:23:01")] + @test datetimes[4].data == [testdts[1]] @test datetimes[4].index2name[1] == "A" end @testset "Test NA Date and DateTime conversion" begin dates = load("$testdir/data/datesNA.rds") - testdates = RData.DataArray([Date("2017-01-01") + Dates.Day.(1:4); Date()], + + testdates = DataArray([Date("2017-01-01") + Dates.Day.(1:4); Date()], BitArray([false, false, false, false, true])) @test dates[1][1:4] == testdates[1:4] - @test RData.isna(dates[1][5]) + @test isna(dates[1][5]) - testdts = RData.DataArray([DateTime("2017-01-01T13:23") + Dates.Second.(1:4); Date()], - BitArray([false, false, false, false, true])) + testdts = DataArray([ZonedDateTime.(DateTime("2017-01-01T13:23") + Dates.Second.(1:4), + tz"UTC"); ZonedDateTime(tz"UTC")], + BitArray([false, false, false, false, true])) @test dates[2][1:4] == testdts[1:4] - @test RData.isna(dates[2][5]) + @test isna(dates[2][5]) + end + + @testset "Test DateTime timezones" begin + # when this warning can go away, uncomment test_broken below, should work now + datetimes = @test_warn "Could not determine timezone of 'CST', treating as if UTC." begin + load("$testdir/data/datetimes_tz.rds") + end + # assumes generate_rda.R was generated on system set to PST! + @test datetimes[1] == ZonedDateTime(DateTime("2017-01-01T21:23"), tz"UTC") + #@test_broken datetimes[2] == ZonedDateTime(DateTime("2017-01-01T13:23"), tz"CST") + @test datetimes[3] == ZonedDateTime(DateTime("2017-01-01T13:23"), tz"America/Chicago") end end diff --git a/test/data/datetimes_tz.rds b/test/data/datetimes_tz.rds new file mode 100644 index 0000000000000000000000000000000000000000..9a9fc416a8e0b6185b4cd27e038f133b94381cd7 GIT binary patch literal 145 zcmV;C0B-*uiwFP!000001B>8dU|?WkU}j-rU}6R`8H9lt$YACJ(u|JRq+-s3L>Ykc zKt2Nt11FGXP0mRyE(S6Lz?va~>;eA4o)O6Ck&IXDE(?9iiBGMnEqH?=4;IZ@v^14yRlgX{qSeTo1jO#uJ^GbA@J literal 0 HcmV?d00001 diff --git a/test/generate_rda.R b/test/generate_rda.R index e046493..f7acc29 100644 --- a/test/generate_rda.R +++ b/test/generate_rda.R @@ -65,3 +65,12 @@ names(datetimes) = LETTERS[1:length(datetimes)] dtlst = c(dtlst, list(datetimes), list(datetimes[1])) saveRDS(dtlst, file="data/datetimes.rds") +# the first element here is assumed to be in the local timezone but is saved in +# UTC time, without any timezone attribute. When R reads it, it assumes local time. +# So the test associated with this first datapoint is going to assume which timezone +# the data is generated in! (PST/-8) +saveRDS(list(as.POSIXct("2017-01-01 13:23"), + as.POSIXct("2017-01-01 13:23", tz="CST"), + as.POSIXct("2017-01-01 13:23", tz="America/Chicago")), + file="data/datetimes_tz.rds") + From e35dc7ee0fff06131e1214dcbc46325cca87abca Mon Sep 17 00:00:00 2001 From: James Sams Date: Thu, 15 Mar 2018 19:27:23 -0700 Subject: [PATCH 19/26] Bring in line with requests on PR #35 * refactor to use jlvec * replace try block with key lookup * remove DataArray dependency * factor date conversion to rdays2date * add test for date and datetimes in data frames, including single row --- src/convert.jl | 66 +++++++++++++++++++++------------------- test/RDS.jl | 29 ++++++++++++------ test/data/datedfs.rds | Bin 0 -> 243 bytes test/data/dates.rds | Bin 138 -> 138 bytes test/data/datesNA.rds | Bin 139 -> 139 bytes test/data/datetimes.rds | Bin 172 -> 172 bytes test/generate_rda.R | 5 ++- 7 files changed, 58 insertions(+), 42 deletions(-) create mode 100644 test/data/datedfs.rds diff --git a/src/convert.jl b/src/convert.jl index a931e54..5d6f07c 100644 --- a/src/convert.jl +++ b/src/convert.jl @@ -1,8 +1,7 @@ # converters from selected RSEXPREC to Hash # They are used to translate SEXPREC attributes into Hash -import TimeZones: unix2zdt -import DataArrays: @data +import TimeZones: unix2zdt, ZonedDateTime function Base.convert(::Type{Hash}, pl::RPairList) res = Hash() @@ -54,7 +53,16 @@ function jlvec(::Type{T}, rv::RNullableVector{R}, force_missing::Bool=true) wher end # convert R vector into Vector of appropriate type -jlvec(rv::RVEC, force_missing::Bool=true) = jlvec(eltype(rv.data), rv, force_missing) +function jlvec(rv::RVEC, force_missing::Bool=true) + cls = class(rv) + if cls == R_Date_Class + return jlvec(Dates.Date, rv, force_missing) + elseif cls == R_POSIXct_Class + return jlvec(ZonedDateTime, rv, force_missing) + else + return jlvec(eltype(rv.data), rv, force_missing) + end +end # convert R logical vector (uses Int32 to store values) into Vector{Bool[?]} function jlvec(rl::RLogicalVector, force_missing::Bool=true) @@ -101,11 +109,7 @@ function sexp2julia(rv::RVEC) # TODO dimnames? # FIXME add force_missing option to control whether always convert to Union{T, Missing} jv = jlvec(rv, false) - if class(rv) == R_Date_Class - return date2julia(rv) - elseif class(rv) == R_POSIXct_Class - return datetime2julia(rv) - elseif hasnames(rv) + if hasnames(rv) return DictoVec(jv, names(rv)) else hasdims = hasdim(rv) @@ -135,30 +139,33 @@ function sexp2julia(rl::RList) end end -function date2julia(rv) - @assert class(rv) == R_Date_Class +function rdays2date(days::Real) epoch_conv = 719528 # Dates.date2epochdays(Date("1970-01-01")) + Dates.epochdays2date(days + epoch_conv) +end + + +function jlvec(::Type{Dates.Date}, rv::RVEC, force_missing::Bool=true) + @assert class(rv) == R_Date_Class nas = isnan.(rv.data) - if any(nas) - dates = @data([isna ? missing : Dates.epochdays2date(dtfloat + epoch_conv) - for (isna, dtfloat) in zip(nas, rv.data)]) + if force_missing || any(nas) + dates = Union{Dates.Date, Missing}[isna ? missing : rdays2date(dtfloat) + for (isna, dtfloat) in zip(nas, rv.data)] else - dates = Dates.epochdays2date.(rv.data .+ epoch_conv) - end - if hasnames(rv) - dates = DictoVec(dates, names(rv)) + dates = rdays2date.(rv.data) end - return length(dates) == 1 & !hasnames(rv) ? dates[1] : dates + return dates end # return tuple is true/false status of whether tzattr was successfully interpreted # then the tz itself. when not successfully interpreted, tz defaults to UTC -function gettz(tzattr) - try - return true, TimeZone(tzattr) - catch ArgumentError +function r2juliatz(tzattr) + valid = haskey(TimeZones.TIME_ZONES, tzattr) + if !valid warn("Could not determine timezone of '$(tzattr)', treating as if UTC.") return false, tz"UTC" + else + return true, TimeZone(tzattr) end end @@ -166,21 +173,18 @@ function unix2zdt(seconds::Real; tz::TimeZone=tz"UTC") ZonedDateTime(Dates.unix2datetime(seconds), tz, from_utc=true) end -function datetime2julia(rv) +function jlvec(::Type{ZonedDateTime}, rv::RVEC, force_missing::Bool=true) @assert class(rv) == R_POSIXct_Class tzattr = getattr(rv, "tzone", ["UTC"])[1] tzattr = tzattr == "" ? "UTC" : tzattr # R will store a blank for tzone - goodtz, tz = gettz(tzattr) + goodtz, tz = r2juliatz(tzattr) nas = isnan.(rv.data) - if any(nas) - datetimes = @data([isna ? missing : unix2zdt(dtfloat, tz=tz) - for (isna, dtfloat) in zip(nas, rv.data)]) + if force_missing || any(nas) + datetimes = Union{ZonedDateTime, Missing}[isna ? missing : unix2zdt(dtfloat, tz=tz) + for (isna, dtfloat) in zip(nas, rv.data)] else datetimes = unix2zdt.(rv.data, tz=tz) end - if hasnames(rv) - datetimes = DictoVec(datetimes, names(rv)) - end - return length(datetimes) == 1 & !hasnames(rv) ? datetimes[1] : datetimes + return datetimes end diff --git a/test/RDS.jl b/test/RDS.jl index 52e7a61..89fb442 100644 --- a/test/RDS.jl +++ b/test/RDS.jl @@ -1,6 +1,5 @@ module TestRDS using Base.Test - using DataArrays using DataFrames using RData using TimeZones @@ -71,19 +70,29 @@ module TestRDS @test datetimes[4].index2name[1] == "A" end + @testset "Test Date and DateTime in a DataFrame" begin + rdfs = load("$testdir/data/datedfs.rds") + df = DataFrame(date=Date("2017-01-01") + Dates.Day.(1:4), + datetime=ZonedDateTime.(DateTime("2017-01-01T13:23") + Dates.Second.(1:4), + tz"UTC")) + @test length(rdfs) == 2 + @test rdfs[1] isa DataFrame + @test rdfs[2] isa DataFrame + @test eltypes(df) == eltypes(rdfs[1]) + @test eltypes(df) == eltypes(rdfs[2]) + @test isequal(df[1, :], rdfs[1]) + @test isequal(df, rdfs[2]) + end + @testset "Test NA Date and DateTime conversion" begin dates = load("$testdir/data/datesNA.rds") - testdates = DataArray([Date("2017-01-01") + Dates.Day.(1:4); Date()], - BitArray([false, false, false, false, true])) - @test dates[1][1:4] == testdates[1:4] - @test ismissing(dates[1][5]) + testdates = [Date("2017-01-01") + Dates.Day.(1:4); missing] + @test all(dates[1] .=== testdates) - testdts = DataArray([ZonedDateTime.(DateTime("2017-01-01T13:23") + Dates.Second.(1:4), - tz"UTC"); ZonedDateTime(tz"UTC")], - BitArray([false, false, false, false, true])) - @test dates[2][1:4] == testdts[1:4] - @test ismissing(dates[2][5]) + testdts = [ZonedDateTime.(DateTime("2017-01-01T13:23") + Dates.Second.(1:4), tz"UTC"); + missing] + @test all(dates[2] .=== testdts) end @testset "Test DateTime timezones" begin diff --git a/test/data/datedfs.rds b/test/data/datedfs.rds new file mode 100644 index 0000000000000000000000000000000000000000..f41707dcd0fd782a3bbd76d88330b2c3a31643eb GIT binary patch literal 243 zcmVhiwFP!000001B>8dU|?WkU}j-vU}6R`8HB-n24)z|2c#JtE*xQi02U?? zpMiye6G*cr=Oh*v0~rEHf-EkHC8V@;5&##WyO@^(006N(Vrl>Y literal 0 HcmV?d00001 diff --git a/test/data/dates.rds b/test/data/dates.rds index 43915b2c1a15e69e3a0becbced87a2285d8a04a7..1cb935f38a8472ad75aa22df23892f2c47cd158a 100644 GIT binary patch delta 98 zcmV-o0GNb2p|cvxFnXO0_lHHLm6R) zLNqh}C!n54N8mCC)q#15xv4M*vOonH9bpV77{eLHaKY_xCUgsd<`D1^054tQ*KGj+ E0I5SGw*UYD delta 98 zcmV-o0G3qX0QE>ku5AGT E01x6M-v9sr diff --git a/test/data/datesNA.rds b/test/data/datesNA.rds index 4e454e7c238703d3aed786852713f31f0ecd4ac4..1ee51e037a5857fe29849471c10438320b3620fc 100644 GIT binary patch delta 27 icmeBX>}H%G!k9KuRFyfRp=zS78;6RKuGbX?1_l6WHwOU# delta 27 jcmeBX>}H%G!k9EsRF%2mm*hlSH;(BC+U2h>FfafBb-f5~ diff --git a/test/data/datetimes.rds b/test/data/datetimes.rds index ee944a4a346bbb2cb38448d35aa04cc2b7136657..062bca3442dd235c64c27e49e61e29a8a42ba837 100644 GIT binary patch delta 130 zcmV-|0Db?g0jvR#6ai+D6(M$kg$YP70(Ec#Y1ZVN#NuKgLjY_jM36neKiD%Oxdh5( z19MBzb(U1+=cPh*!UUN^L!5!sKd5UMVXlGbW&BTqMNI$kn@yxosbMFozw;7vQ(+;- k0u^L*gfX083}+a_1$TfjquU2Ghl(Ku0Gn6;|0)6i0N5cmD*ylh delta 130 zcmV-|0Db?g0jvR#6aiw96(M#3sE>hx5vYR`NV6vABo-F~83JHKA%g4y{=uFR$t6%O z8<<;yuCt^nKQ9%k6DG*)?i&K6{y|;C2y+cYFXMj_EMoeP-)tg%N)0rVYF1Q1P8QnghIaCZO0CIi96)FM%0C_kt5dZ)H diff --git a/test/generate_rda.R b/test/generate_rda.R index 95adb6d..a354c3a 100644 --- a/test/generate_rda.R +++ b/test/generate_rda.R @@ -57,7 +57,7 @@ y <- ordered(x, levels=c("b", "a", "c")) save(x, y, file="data/ord.rda") dates = as.Date("2017-01-01") + 1:4 -datetimes = as.POSIXct("2017-01-01 13:23", tz="GMT") + 1:4 +datetimes = as.POSIXct("2017-01-01 13:23", tz="UTC") + 1:4 dateNAs = list(c(dates, NA), c(datetimes, NA)) saveRDS(dateNAs, file="data/datesNA.rds") datelst = list(dates, dates[1]) @@ -68,6 +68,9 @@ dtlst = list(datetimes, datetimes[1]) names(datetimes) = LETTERS[1:length(datetimes)] dtlst = c(dtlst, list(datetimes), list(datetimes[1])) saveRDS(dtlst, file="data/datetimes.rds") +datedfs = list(data.frame(date=dates[1], datetime=datetimes[1]), + data.frame(date=dates, datetime=datetimes)) +saveRDS(datedfs, file="data/datedfs.rds") # the first element here is assumed to be in the local timezone but is saved in # UTC time, without any timezone attribute. When R reads it, it assumes local time. From 342dfa54c8f78baab23942bcee10d51580d85eb5 Mon Sep 17 00:00:00 2001 From: James Sams Date: Thu, 15 Mar 2018 19:33:33 -0700 Subject: [PATCH 20/26] move jlvec date/time functions to be with others --- src/convert.jl | 57 +++++++++++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/src/convert.jl b/src/convert.jl index 5d6f07c..d55b48a 100644 --- a/src/convert.jl +++ b/src/convert.jl @@ -100,6 +100,35 @@ function jlvec(ri::RIntegerVector, force_missing::Bool=true) end end +# convert to Date +function jlvec(::Type{Dates.Date}, rv::RVEC, force_missing::Bool=true) + @assert class(rv) == R_Date_Class + nas = isnan.(rv.data) + if force_missing || any(nas) + dates = Union{Dates.Date, Missing}[isna ? missing : rdays2date(dtfloat) + for (isna, dtfloat) in zip(nas, rv.data)] + else + dates = rdays2date.(rv.data) + end + return dates +end + +# convert to ZonedDateTime +function jlvec(::Type{ZonedDateTime}, rv::RVEC, force_missing::Bool=true) + @assert class(rv) == R_POSIXct_Class + tzattr = getattr(rv, "tzone", ["UTC"])[1] + tzattr = tzattr == "" ? "UTC" : tzattr # R will store a blank for tzone + goodtz, tz = r2juliatz(tzattr) + nas = isnan.(rv.data) + if force_missing || any(nas) + datetimes = Union{ZonedDateTime, Missing}[isna ? missing : unix2zdt(dtfloat, tz=tz) + for (isna, dtfloat) in zip(nas, rv.data)] + else + datetimes = unix2zdt.(rv.data, tz=tz) + end + return datetimes +end + function sexp2julia(rex::RSEXPREC) warn("Conversion of $(typeof(rex)) to Julia is not implemented") return nothing @@ -144,19 +173,6 @@ function rdays2date(days::Real) Dates.epochdays2date(days + epoch_conv) end - -function jlvec(::Type{Dates.Date}, rv::RVEC, force_missing::Bool=true) - @assert class(rv) == R_Date_Class - nas = isnan.(rv.data) - if force_missing || any(nas) - dates = Union{Dates.Date, Missing}[isna ? missing : rdays2date(dtfloat) - for (isna, dtfloat) in zip(nas, rv.data)] - else - dates = rdays2date.(rv.data) - end - return dates -end - # return tuple is true/false status of whether tzattr was successfully interpreted # then the tz itself. when not successfully interpreted, tz defaults to UTC function r2juliatz(tzattr) @@ -173,18 +189,3 @@ function unix2zdt(seconds::Real; tz::TimeZone=tz"UTC") ZonedDateTime(Dates.unix2datetime(seconds), tz, from_utc=true) end -function jlvec(::Type{ZonedDateTime}, rv::RVEC, force_missing::Bool=true) - @assert class(rv) == R_POSIXct_Class - tzattr = getattr(rv, "tzone", ["UTC"])[1] - tzattr = tzattr == "" ? "UTC" : tzattr # R will store a blank for tzone - goodtz, tz = r2juliatz(tzattr) - nas = isnan.(rv.data) - if force_missing || any(nas) - datetimes = Union{ZonedDateTime, Missing}[isna ? missing : unix2zdt(dtfloat, tz=tz) - for (isna, dtfloat) in zip(nas, rv.data)] - else - datetimes = unix2zdt.(rv.data, tz=tz) - end - return datetimes -end - From c1bd8db0abc82d1db3f972a1f14c54c561f13736 Mon Sep 17 00:00:00 2001 From: James Sams Date: Thu, 15 Mar 2018 20:01:00 -0700 Subject: [PATCH 21/26] more reliable lookup of timezone --- src/convert.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/convert.jl b/src/convert.jl index d55b48a..5a6e1b1 100644 --- a/src/convert.jl +++ b/src/convert.jl @@ -1,7 +1,7 @@ # converters from selected RSEXPREC to Hash # They are used to translate SEXPREC attributes into Hash -import TimeZones: unix2zdt, ZonedDateTime +import TimeZones: istimezone, unix2zdt, ZonedDateTime function Base.convert(::Type{Hash}, pl::RPairList) res = Hash() @@ -176,7 +176,7 @@ end # return tuple is true/false status of whether tzattr was successfully interpreted # then the tz itself. when not successfully interpreted, tz defaults to UTC function r2juliatz(tzattr) - valid = haskey(TimeZones.TIME_ZONES, tzattr) + valid = istimezone(tzattr) if !valid warn("Could not determine timezone of '$(tzattr)', treating as if UTC.") return false, tz"UTC" From f009ad4514021c5dd4c3ecdcc1fd7f8bbf62c78c Mon Sep 17 00:00:00 2001 From: James Sams Date: Thu, 15 Mar 2018 22:05:49 -0700 Subject: [PATCH 22/26] more refactoring of r2juliatz, added back a deleted comment --- src/convert.jl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/convert.jl b/src/convert.jl index 5a6e1b1..e4d4e6b 100644 --- a/src/convert.jl +++ b/src/convert.jl @@ -116,9 +116,7 @@ end # convert to ZonedDateTime function jlvec(::Type{ZonedDateTime}, rv::RVEC, force_missing::Bool=true) @assert class(rv) == R_POSIXct_Class - tzattr = getattr(rv, "tzone", ["UTC"])[1] - tzattr = tzattr == "" ? "UTC" : tzattr # R will store a blank for tzone - goodtz, tz = r2juliatz(tzattr) + goodtz, tz = r2juliatz(rv) nas = isnan.(rv.data) if force_missing || any(nas) datetimes = Union{ZonedDateTime, Missing}[isna ? missing : unix2zdt(dtfloat, tz=tz) @@ -139,6 +137,7 @@ function sexp2julia(rv::RVEC) # FIXME add force_missing option to control whether always convert to Union{T, Missing} jv = jlvec(rv, false) if hasnames(rv) + # if data has no NA, convert to simple Vector return DictoVec(jv, names(rv)) else hasdims = hasdim(rv) @@ -175,7 +174,13 @@ end # return tuple is true/false status of whether tzattr was successfully interpreted # then the tz itself. when not successfully interpreted, tz defaults to UTC -function r2juliatz(tzattr) +function r2juliatz(rv::RVEC) + tzattr = getattr(rv, "tzone", ["UTC"])[1] + tzattr = tzattr == "" ? "UTC" : tzattr # R will store a blank for tzone + return r2juliatz(tzattr) +end + +function r2juliatz(tzattr::AbstractString) valid = istimezone(tzattr) if !valid warn("Could not determine timezone of '$(tzattr)', treating as if UTC.") From 982520d1558f9c3ae9769ff5e7d2ec772e2ebd2d Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Tue, 27 Mar 2018 15:20:10 +0000 Subject: [PATCH 23/26] update news --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index f1bb097..d6f58a7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ ##### Changes * add support for `.rds` files (single object data files from R) [#22], [#33] -* add support for `Date` and `POSIXct`, though still lacking complete timezone handling [#34] +* add support for `Date` and `POSIXct` (only for timezone codes supported by [TimeZones](https://github.com/JuliaTime/TimeZones.jl)) data [#34] [#22]: https://github.com/JuliaStats/RData.jl/issues/22 [#33]: https://github.com/JuliaStats/RData.jl/issues/33 From a8da9b357eae8d003921c8f3cb2bb5f686c54d05 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Tue, 27 Mar 2018 15:20:23 +0000 Subject: [PATCH 24/26] update conversion table --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 7c29819..3a48fe6 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,8 @@ convert R objects into Julia equivalents: | named vector, list | `DictoVec` | `DictoVec` allows indexing both by element index and by its name, just as R vectors and lists | | vector | `Vector{T}` | `T` is the appropriate Julia type. If R vector contains `NA` values, they are converted to [`missing`](https://github.com/JuliaData/Missings.jl), and the elements type of the resulting `Vector` is `Union{T, Missing}`. | factor | `CategoricalArray` | [CategoricalArrays.jl](https://github.com/JuliaData/CategoricalArrays.jl) | +| `Date` | `Dates.Date` | | +| `POSIXct` date time | `ZonedDateTime` | [TimeZones.jl](https://github.com/JuliaTime/TimeZones.jl) | | data frame | `DataFrame` | [DataFrames.jl](https://github.com/JuliaData/DataFrames.jl) | If conversion to the Julia type is not supported (e.g. R closure or language expression), `load()` will return the internal RData representation of the object (`RSEXPREC` subtype). From 676cc88d41a236d48ac2823f0e89cdbdc5334a3e Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Tue, 27 Mar 2018 15:22:12 +0000 Subject: [PATCH 25/26] refactor timezone handling allow specifying fallback (default) timezone --- src/convert.jl | 38 ++++++++++++++++++++++---------------- test/RDS.jl | 11 +++++------ 2 files changed, 27 insertions(+), 22 deletions(-) diff --git a/src/convert.jl b/src/convert.jl index e4d4e6b..1558008 100644 --- a/src/convert.jl +++ b/src/convert.jl @@ -100,7 +100,7 @@ function jlvec(ri::RIntegerVector, force_missing::Bool=true) end end -# convert to Date +# convert R Date to Dates.Date function jlvec(::Type{Dates.Date}, rv::RVEC, force_missing::Bool=true) @assert class(rv) == R_Date_Class nas = isnan.(rv.data) @@ -113,10 +113,10 @@ function jlvec(::Type{Dates.Date}, rv::RVEC, force_missing::Bool=true) return dates end -# convert to ZonedDateTime +# convert R POSIXct to ZonedDateTime function jlvec(::Type{ZonedDateTime}, rv::RVEC, force_missing::Bool=true) @assert class(rv) == R_POSIXct_Class - goodtz, tz = r2juliatz(rv) + tz, validtz = getjuliatz(rv) nas = isnan.(rv.data) if force_missing || any(nas) datetimes = Union{ZonedDateTime, Missing}[isna ? missing : unix2zdt(dtfloat, tz=tz) @@ -168,29 +168,35 @@ function sexp2julia(rl::RList) end function rdays2date(days::Real) - epoch_conv = 719528 # Dates.date2epochdays(Date("1970-01-01")) + const epoch_conv = 719528 # Dates.date2epochdays(Date("1970-01-01")) Dates.epochdays2date(days + epoch_conv) end -# return tuple is true/false status of whether tzattr was successfully interpreted -# then the tz itself. when not successfully interpreted, tz defaults to UTC -function r2juliatz(rv::RVEC) - tzattr = getattr(rv, "tzone", ["UTC"])[1] - tzattr = tzattr == "" ? "UTC" : tzattr # R will store a blank for tzone - return r2juliatz(tzattr) +# gets R timezone from the data attribute and converts it to TimeZones.TimeZone +# see r2juliatz() +function getjuliatz(rv::RVEC, deftz=tz"UTC") + tzattr = getattr(rv, "tzone", [""])[1] + if tzattr == "" + return deftz, true # R will store a blank for tzone + else + return r2juliatz(tzattr, deftz) + end end -function r2juliatz(tzattr::AbstractString) - valid = istimezone(tzattr) +# converts R timezone code to TimeZones.TimeZone +# returns a tuple: +# - timezone (or `deftz` if `rtz` is not recognized as a valid time zone) +# - boolean flag: true if `rtz` is not recognized, false otherwise +function r2juliatz(rtz::AbstractString, deftz=tz"UTC") + valid = istimezone(rtz) if !valid - warn("Could not determine timezone of '$(tzattr)', treating as if UTC.") - return false, tz"UTC" + warn("Could not determine the timezone of '$(rtz)', treating as $deftz.") + return deftz, false else - return true, TimeZone(tzattr) + return TimeZone(rtz), true end end function unix2zdt(seconds::Real; tz::TimeZone=tz"UTC") ZonedDateTime(Dates.unix2datetime(seconds), tz, from_utc=true) end - diff --git a/test/RDS.jl b/test/RDS.jl index 89fb442..c5db53d 100644 --- a/test/RDS.jl +++ b/test/RDS.jl @@ -96,16 +96,15 @@ module TestRDS end @testset "Test DateTime timezones" begin - # when this warning can go away, uncomment test_broken below, should work now - datetimes = @test_warn "Could not determine timezone of 'CST', treating as if UTC." begin + # tz"CST" is not supported by TimeZones.jl + datetimes = @test_warn "Could not determine the timezone of 'CST', treating as UTC." begin load("$testdir/data/datetimes_tz.rds") end # assumes generate_rda.R was generated on system set to PST! @test datetimes[1] == ZonedDateTime(DateTime("2017-01-01T21:23"), tz"UTC") - # tz"CST" is invalid, but if TimeZones ever enables support for these 3 - # letter codes, a test would be useful. For now, intentionally not testing - #@test_broken datetimes[2] == ZonedDateTime(DateTime("2017-01-01T13:23"), tz"CST") + # should be tz"CST", but gets substituted to tz"UTC" + # FIXME update the test when CST is supported + @test datetimes[2] == ZonedDateTime(DateTime("2017-01-01T13:23"), tz"UTC") @test datetimes[3] == ZonedDateTime(DateTime("2017-01-01T13:23"), tz"America/Chicago") end end - From e86ee6d365a2723d777c6ab8cc1df524df9ecbaf Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Tue, 27 Mar 2018 23:25:35 +0200 Subject: [PATCH 26/26] mention this PR in the news --- NEWS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index d6f58a7..b753f8b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,11 +2,12 @@ ##### Changes * add support for `.rds` files (single object data files from R) [#22], [#33] -* add support for `Date` and `POSIXct` (only for timezone codes supported by [TimeZones](https://github.com/JuliaTime/TimeZones.jl)) data [#34] +* add support for `Date` and `POSIXct` (only for timezone codes supported by [TimeZones](https://github.com/JuliaTime/TimeZones.jl)) data [#34], [#35] [#22]: https://github.com/JuliaStats/RData.jl/issues/22 [#33]: https://github.com/JuliaStats/RData.jl/issues/33 [#34]: https://github.com/JuliaStats/RData.jl/issues/34 +[#35]: https://github.com/JuliaStats/RData.jl/issues/35 ## RData v0.3.0 Release Notes