From b0048e1b2e1500c53262623380e4c26ff27d6fe3 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Thu, 9 Mar 2017 17:34:27 +0100 Subject: [PATCH 1/7] remove ctor @compat for v0.4 --- src/io/XDRIO.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/io/XDRIO.jl b/src/io/XDRIO.jl index 48ba09a..29a959e 100644 --- a/src/io/XDRIO.jl +++ b/src/io/XDRIO.jl @@ -4,7 +4,7 @@ type XDRIO{T<:IO} <: RDAIO sub::T # underlying IO stream buf::Vector{UInt8} # buffer for strings - @compat (::Type{XDRIO}){T <: IO}(io::T) = new{T}(io, Vector{UInt8}(1024)) + (::Type{XDRIO}){T <: IO}(io::T) = new{T}(io, Vector{UInt8}(1024)) end readint32(io::XDRIO) = ntoh(read(io.sub, Int32)) From be72ad0e1108d8457f72843c0bfaa6227f0c1f95 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Thu, 13 Jul 2017 19:41:02 +0000 Subject: [PATCH 2/7] switch to DataTables * drop Julia 0.4 support (since DataTables require Julia 0.5) * convert from using DataArrays to NullableArrays/CategoricalArrays --- NEWS.md | 11 ++++++++ REQUIRE | 3 +-- appveyor.yml | 2 ++ src/RData.jl | 5 ++-- src/convert.jl | 69 +++++++++++++++++++++++++++++--------------------- src/sxtypes.jl | 2 +- test/RDA.jl | 14 +++++----- 7 files changed, 64 insertions(+), 42 deletions(-) diff --git a/NEWS.md b/NEWS.md index 8dcd3e7..edfc4f7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,12 @@ +## RData v0.1.0 Release Notes + +Switched from `DataFrames` to `DataTables`, dropped Julia v0.4 support + +##### Changes +* using `NullableArrays.jl` and `CategoricalArrays.jl` +instead of `DataArrays.jl` ([#19], see [JuliaStats/DataFrames.jl#1008]) +* Julia v0.4 not supported (`DataTables.jl` requires v0.5) + ## RData v0.0.4 Release Notes Now the recommended way to load `.RData`/`.rda` files is by `FileIO.load()`. @@ -15,5 +24,7 @@ Initial release based on `DataFrames.read_rda()` ([JuliaStats/DataFrames.jl#1031 [#9]: https://github.com/JuliaStats/RData.jl/issues/9 [#10]: https://github.com/JuliaStats/RData.jl/issues/10 [#15]: https://github.com/JuliaStats/RData.jl/issues/15 +[#19]: https://github.com/JuliaStats/RData.jl/issues/19 +[JuliaStats/DataFrames.jl#1008]: https://github.com/JuliaStats/DataFrames.jl/pull/1008 [JuliaStats/DataFrames.jl#1031]: https://github.com/JuliaStats/DataFrames.jl/pull/1031 diff --git a/REQUIRE b/REQUIRE index 41080fc..7de741a 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,6 +1,5 @@ julia 0.5 -DataFrames 0.7 -DataArrays 0.3 +DataTables FileIO 0.1.2 GZip 0.2 Compat 0.17 diff --git a/appveyor.yml b/appveyor.yml index a9ae8a1..000a8de 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -2,6 +2,8 @@ environment: matrix: - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe" - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe" + - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.6/julia-0.6-latest-win32.exe" + - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.6/julia-0.6-latest-win64.exe" - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x86/julia-latest-win32.exe" - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x64/julia-latest-win64.exe" diff --git a/src/RData.jl b/src/RData.jl index 1dd7a42..53475e8 100644 --- a/src/RData.jl +++ b/src/RData.jl @@ -2,9 +2,8 @@ __precompile__() module RData -using Compat, DataFrames, GZip, FileIO -import DataArrays: data -import DataFrames: identifier +using Compat, DataTables, GZip, FileIO +import DataTables: identifier import Compat: unsafe_string import FileIO: load diff --git a/src/convert.jl b/src/convert.jl index c0d0002..226591c 100644 --- a/src/convert.jl +++ b/src/convert.jl @@ -11,33 +11,48 @@ end ############################################################################## ## -## Conversion of intermediate R objects into DataArray and DataFrame objects +## Conversion of intermediate R objects into NullableArray and DataTable objects ## ############################################################################## -namask(rl::RLogicalVector) = BitArray(rl.data .== R_NA_INT32) -namask(ri::RIntegerVector) = BitArray(ri.data .== R_NA_INT32) -namask(rn::RNumericVector) = BitArray(map(isna_float64, reinterpret(UInt64, rn.data))) +namask(rl::RLogicalVector) = [flag == R_NA_INT32 for flag in rl.data] +namask(ri::RIntegerVector) = [flag == R_NA_INT32 for flag in ri.data] +namask(rn::RNumericVector) = map(isna_float64, reinterpret(UInt64, rn.data)) # if re or im is NA, the whole complex number is NA # FIXME avoid temporary Vector{Bool} -namask(rc::RComplexVector) = BitArray([isna_float64(v.re) || isna_float64(v.im) for v in reinterpret(Complex{UInt64}, rc.data)]) +namask(rc::RComplexVector) = [isna_float64(v.re) || isna_float64(v.im) for v in reinterpret(Complex{UInt64}, rc.data)] namask(rv::RNullableVector) = rv.na -DataArrays.data(rv::RVEC) = DataArray(rv.data, namask(rv)) +function _julia_vector(rv::RVEC, force_nullable::Bool) + na_mask = namask(rv) + (force_nullable || any(na_mask)) ? NullableArray(rv.data, na_mask) : rv.data +end + +# convert R vector into either NullableArray +# or Array if force_nullable=false and there are no NAs +julia_vector(rv::RVEC, force_nullable::Bool) = _julia_vector(rv, force_nullable) + +# converts Vector{Int32} into Vector{R} replacing R_NA_INT32 with 0 +na2zero{R}(::Type{R}, v::Vector{Int32}) = [x != R_NA_INT32 ? R(x) : zero(R) for x in v] + +# convert to [Nullable]CategoricalArray{String} if `ri`is a factor, +# or to [Nullable]Array{Int32} otherwise +function julia_vector(ri::RIntegerVector, force_nullable::Bool) + !isfactor(ri) && return _julia_vector(ri, force_nullable) # not a factor -function DataArrays.data(ri::RIntegerVector) - if !isfactor(ri) return DataArray(ri.data, namask(ri)) end - # convert factor into PooledDataArray - pool = getattr(ri, "levels", emptystrvec) - sz = length(pool) + # convert factor into [Nullable]CategoricalArray + rlevels = getattr(ri, "levels", emptystrvec) + sz = length(rlevels) REFTYPE = sz <= typemax(UInt8) ? UInt8 : sz <= typemax(UInt16) ? UInt16 : sz <= typemax(UInt32) ? UInt32 : UInt64 - dd = ri.data - dd[namask(ri)] = 0 - refs = convert(Vector{REFTYPE}, dd) - return PooledDataArray(DataArrays.RefArray(refs), pool) + # FIXME set ordered flag + refs = na2zero(REFTYPE, ri.data) + pool = CategoricalPool{String, REFTYPE}(rlevels) + (force_nullable || (findfirst(refs, zero(REFTYPE)) > 0)) ? + NullableCategoricalArray{String, 1, REFTYPE}(refs, pool) : + CategoricalArray{String, 1, REFTYPE}(refs, pool) end function sexp2julia(rex::RSEXPREC) @@ -46,36 +61,32 @@ function sexp2julia(rex::RSEXPREC) end function sexp2julia(rv::RVEC) - # FIXME dimnames - # FIXME forceDataArrays option to always convert to DataArray - nas = namask(rv) - hasna = any(nas) + # TODO dimnames? + # FIXME forceNullable option to always convert to NullableArray + jv = julia_vector(rv, false) if hasnames(rv) # if data has no NA, convert to simple Vector - return DictoVec(hasna ? DataArray(rv.data, nas) : rv.data, names(rv)) + return DictoVec(jv, names(rv)) else hasdims = hasdim(rv) if !hasdims && length(rv.data)==1 # scalar - # FIXME handle NAs - # if hasna - return rv.data[1] + return jv[1] elseif !hasdims # vectors - return hasna ? DataArray(rv.data, nas) : rv.data + return jv else # matrices and so on - dims = tuple(convert(Vector{Int64}, getattr(rv, "dim"))...) - return hasna ? DataArray(reshape(rv.data, dims), reshape(nas, dims)) : - reshape(rv.data, dims) + dims = tuple(convert(Vector{Int}, getattr(rv, "dim"))...) + return reshape(jv, dims) end end end function sexp2julia(rl::RList) if isdataframe(rl) - # FIXME remove Any type assertion workaround - DataFrame(Any[data(col) for col in rl.data], map(identifier, names(rl))) + # FIXME forceNullable option to always convert to NullableArray + DataTable(Any[julia_vector(col, true) for col in rl.data], map(identifier, names(rl))) elseif hasnames(rl) DictoVec(Any[sexp2julia(item) for item in rl.data], names(rl)) else diff --git a/src/sxtypes.jl b/src/sxtypes.jl index 7bb3fb0..2a4774f 100644 --- a/src/sxtypes.jl +++ b/src/sxtypes.jl @@ -125,7 +125,7 @@ const RComplexVector = RVector{Complex128, CPLXSXP} """ immutable RNullableVector{T, S} <: RVEC{T, S} data::Vector{T} - na::BitVector # mask of NA elements + na::Vector{Bool} # mask of NA elements attr::Hash # collection of R object attributes end diff --git a/test/RDA.jl b/test/RDA.jl index 4666772..d16b207 100644 --- a/test/RDA.jl +++ b/test/RDA.jl @@ -1,6 +1,6 @@ module TestRDA using Base.Test - using DataFrames + using DataTables using RData using Compat @@ -15,7 +15,7 @@ module TestRDA testdir = dirname(@__FILE__) - df = DataFrame(num = [1.1, 2.2]) + df = DataTable(num = [1.1, 2.2]) @test isequal(sexp2julia(load("$testdir/data/minimal.rda",convert=false)["df"]), df) @test isequal(load("$testdir/data/minimal.rda",convert=true)["df"], df) @test isequal(load("$testdir/data/minimal_ascii.rda")["df"], df) @@ -23,19 +23,19 @@ module TestRDA df[:int] = Int32[1, 2] df[:logi] = [true, false] df[:chr] = ["ab", "c"] - df[:factor] = pool(df[:chr]) + df[:factor] = categorical(df[:chr]) df[:cplx] = Complex128[1.1+0.5im, 1.0im] @test isequal(sexp2julia(load("$testdir/data/types.rda",convert=false)["df"]), df) @test isequal(sexp2julia(load("$testdir/data/types_ascii.rda",convert=false)["df"]), df) - df[2, :] = NA + df[2, :] = Nullable() append!(df, df[2, :]) df[3, :num] = NaN - df[:, :cplx] = @data [NA, @compat(Complex128(1,NaN)), NaN] + df[:, :cplx] = NullableVector([Nullable(), Complex128(1,NaN), NaN]) @test isequal(sexp2julia(load("$testdir/data/NAs.rda",convert=false)["df"]), df) # ASCII format saves NaN as NA - df[3, :num] = NA - df[:, :cplx] = @data [NA, NA, NA] + df[3, :num] = Nullable() + df[:, :cplx] = NullableVector{Complex128}(3) @test isequal(sexp2julia(load("$testdir/data/NAs_ascii.rda",convert=false)["df"]), df) rda_names = names(sexp2julia(load("$testdir/data/names.rda",convert=false)["df"])) From fae1a590ded9c5c0a742c62f4a96528e50118b73 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sun, 2 Oct 2016 00:44:42 +0200 Subject: [PATCH 3/7] temporarily require DataTables master for CI --- .travis.yml | 6 +++--- appveyor.yml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1e9eea5..11a43a7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,8 +8,8 @@ julia: notifications: email: false # uncomment the following lines to override the default test script -#script: -# - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi -# - julia --check-bounds=yes -e 'Pkg.clone(pwd()); Pkg.build("RData"); Pkg.test("RData"; coverage=true)' +script: + - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi + - julia --check-bounds=yes -e 'Pkg.clone(pwd()); Pkg.build("RData"); Pkg.checkout("DataTables", "master"); Pkg.test("RData"; coverage=true)' after_success: - julia -e 'cd(Pkg.dir("RData")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())'; diff --git a/appveyor.yml b/appveyor.yml index 000a8de..2d9e258 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -36,7 +36,7 @@ build_script: # Need to convert from shallow to complete for Pkg.clone to work - IF EXIST .git\shallow (git fetch --unshallow) - C:\projects\julia\bin\julia -e "versioninfo(); - Pkg.clone(pwd(), \"RData\"); Pkg.build(\"RData\")" + Pkg.clone(pwd(), \"RData\"); Pkg.build(\"RData\"); Pkg.checkout(\"DataTables\", \"master\")" test_script: - C:\projects\julia\bin\julia -e "Pkg.test(\"RData\")" From 01ad191bd36085f9c93e90044cbdf9bbc253ea56 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Thu, 9 Mar 2017 23:05:01 +0100 Subject: [PATCH 4/7] convert logical vector to Vector{Bool} + tests --- NEWS.md | 1 + src/convert.jl | 17 +++++++++++------ test/RDA.jl | 10 +++++++--- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/NEWS.md b/NEWS.md index edfc4f7..b91e867 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,7 @@ Switched from `DataFrames` to `DataTables`, dropped Julia v0.4 support * using `NullableArrays.jl` and `CategoricalArrays.jl` instead of `DataArrays.jl` ([#19], see [JuliaStats/DataFrames.jl#1008]) * Julia v0.4 not supported (`DataTables.jl` requires v0.5) +* R logical vectors converted to `Vector{Bool}` (instead of `Vector{Int32}`) ## RData v0.0.4 Release Notes diff --git a/src/convert.jl b/src/convert.jl index 226591c..20daca8 100644 --- a/src/convert.jl +++ b/src/convert.jl @@ -15,22 +15,27 @@ end ## ############################################################################## -namask(rl::RLogicalVector) = [flag == R_NA_INT32 for flag in rl.data] -namask(ri::RIntegerVector) = [flag == R_NA_INT32 for flag in ri.data] +namask(ri::RVector{Int32}) = [i == R_NA_INT32 for i in ri.data] namask(rn::RNumericVector) = map(isna_float64, reinterpret(UInt64, rn.data)) # if re or im is NA, the whole complex number is NA # FIXME avoid temporary Vector{Bool} namask(rc::RComplexVector) = [isna_float64(v.re) || isna_float64(v.im) for v in reinterpret(Complex{UInt64}, rc.data)] namask(rv::RNullableVector) = rv.na -function _julia_vector(rv::RVEC, force_nullable::Bool) +function _julia_vector{T}(::Type{T}, rv::RVEC, force_nullable::Bool) na_mask = namask(rv) - (force_nullable || any(na_mask)) ? NullableArray(rv.data, na_mask) : rv.data + (force_nullable || any(na_mask)) ? NullableArray(convert(Vector{T}, rv.data), na_mask) : rv.data end # convert R vector into either NullableArray # or Array if force_nullable=false and there are no NAs -julia_vector(rv::RVEC, force_nullable::Bool) = _julia_vector(rv, force_nullable) +julia_vector(rv::RVEC, force_nullable::Bool) = _julia_vector(eltype(rv.data), rv, force_nullable) + +function julia_vector(rl::RLogicalVector, force_nullable::Bool) + v = Bool[flag != zero(eltype(rl.data)) for flag in rl.data] + na_mask = namask(rl) + (force_nullable || any(na_mask)) ? NullableArray(v, na_mask) : v +end # converts Vector{Int32} into Vector{R} replacing R_NA_INT32 with 0 na2zero{R}(::Type{R}, v::Vector{Int32}) = [x != R_NA_INT32 ? R(x) : zero(R) for x in v] @@ -38,7 +43,7 @@ na2zero{R}(::Type{R}, v::Vector{Int32}) = [x != R_NA_INT32 ? R(x) : zero(R) for # convert to [Nullable]CategoricalArray{String} if `ri`is a factor, # or to [Nullable]Array{Int32} otherwise function julia_vector(ri::RIntegerVector, force_nullable::Bool) - !isfactor(ri) && return _julia_vector(ri, force_nullable) # not a factor + !isfactor(ri) && return _julia_vector(eltype(ri.data), ri, force_nullable) # not a factor # convert factor into [Nullable]CategoricalArray rlevels = getattr(ri, "levels", emptystrvec) diff --git a/test/RDA.jl b/test/RDA.jl index d16b207..fb6879a 100644 --- a/test/RDA.jl +++ b/test/RDA.jl @@ -23,10 +23,14 @@ module TestRDA df[:int] = Int32[1, 2] df[:logi] = [true, false] df[:chr] = ["ab", "c"] - df[:factor] = categorical(df[:chr]) + df[:factor] = categorical(df[:chr], true) df[:cplx] = Complex128[1.1+0.5im, 1.0im] - @test isequal(sexp2julia(load("$testdir/data/types.rda",convert=false)["df"]), df) - @test isequal(sexp2julia(load("$testdir/data/types_ascii.rda",convert=false)["df"]), df) + rdf = sexp2julia(load("$testdir/data/types.rda",convert=false)["df"]) + @test eltypes(rdf) == eltypes(df) + @test isequal(rdf, df) + rdf_ascii = sexp2julia(load("$testdir/data/types_ascii.rda",convert=false)["df"]) + @test eltypes(rdf_ascii) == eltypes(df) + @test isequal(rdf_ascii, df) df[2, :] = Nullable() append!(df, df[2, :]) From 10d9294a823442ff453975b200ba9e6eb4deb42f Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Thu, 13 Jul 2017 14:16:53 +0000 Subject: [PATCH 5/7] update tests - use == instead of isequal() - explicitly make the columns nullable --- test/RDA.jl | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/test/RDA.jl b/test/RDA.jl index fb6879a..f5bd93c 100644 --- a/test/RDA.jl +++ b/test/RDA.jl @@ -15,32 +15,33 @@ module TestRDA testdir = dirname(@__FILE__) - df = DataTable(num = [1.1, 2.2]) - @test isequal(sexp2julia(load("$testdir/data/minimal.rda",convert=false)["df"]), df) - @test isequal(load("$testdir/data/minimal.rda",convert=true)["df"], df) - @test isequal(load("$testdir/data/minimal_ascii.rda")["df"], df) - - df[:int] = Int32[1, 2] - df[:logi] = [true, false] - df[:chr] = ["ab", "c"] - df[:factor] = categorical(df[:chr], true) - df[:cplx] = Complex128[1.1+0.5im, 1.0im] + df = DataTable(num = NullableArray([1.1, 2.2])) + @test sexp2julia(load("$testdir/data/minimal.rda",convert=false)["df"]) == df + @test load("$testdir/data/minimal.rda",convert=true)["df"] == df + @test load("$testdir/data/minimal_ascii.rda")["df"] == df + + df = DataTable(num = NullableArray([1.1, 2.2]), + int = NullableArray(Int32[1, 2]), + logi = NullableArray([true, false]), + chr = NullableArray(["ab", "c"]), + factor = categorical(NullableArray(["ab", "c"]), true), + cplx = NullableArray(Complex128[1.1+0.5im, 1.0im])) rdf = sexp2julia(load("$testdir/data/types.rda",convert=false)["df"]) @test eltypes(rdf) == eltypes(df) - @test isequal(rdf, df) + @test rdf == df rdf_ascii = sexp2julia(load("$testdir/data/types_ascii.rda",convert=false)["df"]) @test eltypes(rdf_ascii) == eltypes(df) - @test isequal(rdf_ascii, df) + @test rdf_ascii == df df[2, :] = Nullable() append!(df, df[2, :]) df[3, :num] = NaN - df[:, :cplx] = NullableVector([Nullable(), Complex128(1,NaN), NaN]) + df[:, :cplx] = NullableArray([Nullable(), Complex128(1,NaN), NaN]) @test isequal(sexp2julia(load("$testdir/data/NAs.rda",convert=false)["df"]), df) # ASCII format saves NaN as NA df[3, :num] = Nullable() - df[:, :cplx] = NullableVector{Complex128}(3) - @test isequal(sexp2julia(load("$testdir/data/NAs_ascii.rda",convert=false)["df"]), df) + df[:, :cplx] = NullableArray{Complex128}(3) + @test sexp2julia(load("$testdir/data/NAs_ascii.rda",convert=false)["df"]) == df rda_names = names(sexp2julia(load("$testdir/data/names.rda",convert=false)["df"])) expected_names = [:_end, :x!, :x1, :_B_C_, :x, :x_1] From f53af1047e4b9de0f1d701b4c29939701dd1315c Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Thu, 13 Jul 2017 14:17:16 +0000 Subject: [PATCH 6/7] group tests into testsets --- test/RDA.jl | 113 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 65 insertions(+), 48 deletions(-) diff --git a/test/RDA.jl b/test/RDA.jl index f5bd93c..0153f07 100644 --- a/test/RDA.jl +++ b/test/RDA.jl @@ -5,55 +5,72 @@ module TestRDA using Compat # check for Float64 NA - @test !RData.isna_float64(reinterpret(UInt64, 1.0)) - @test !RData.isna_float64(reinterpret(UInt64, NaN)) - @test !RData.isna_float64(reinterpret(UInt64, Inf)) - @test !RData.isna_float64(reinterpret(UInt64, -Inf)) - @test RData.isna_float64(reinterpret(UInt64, RData.R_NA_FLOAT64)) - # check that alternative NA is also recognized (#10) - @test RData.isna_float64(reinterpret(UInt64, RData.R_NA_FLOAT64 | ((Base.significand_mask(Float64) + 1) >> 1))) + @testset "Detect R floating-point NAs" begin + @test !RData.isna_float64(reinterpret(UInt64, 1.0)) + @test !RData.isna_float64(reinterpret(UInt64, NaN)) + @test !RData.isna_float64(reinterpret(UInt64, Inf)) + @test !RData.isna_float64(reinterpret(UInt64, -Inf)) + @test RData.isna_float64(reinterpret(UInt64, RData.R_NA_FLOAT64)) + # check that alternative NA is also recognized (#10) + @test RData.isna_float64(reinterpret(UInt64, RData.R_NA_FLOAT64 | ((Base.significand_mask(Float64) + 1) >> 1))) + end testdir = dirname(@__FILE__) + @testset "Reading minimal RData" begin + df = DataTable(num = NullableArray([1.1, 2.2])) + @test sexp2julia(load("$testdir/data/minimal.rda",convert=false)["df"]) == df + @test load("$testdir/data/minimal.rda",convert=true)["df"] == df + @test load("$testdir/data/minimal_ascii.rda")["df"] == df + end - df = DataTable(num = NullableArray([1.1, 2.2])) - @test sexp2julia(load("$testdir/data/minimal.rda",convert=false)["df"]) == df - @test load("$testdir/data/minimal.rda",convert=true)["df"] == df - @test load("$testdir/data/minimal_ascii.rda")["df"] == df - - df = DataTable(num = NullableArray([1.1, 2.2]), - int = NullableArray(Int32[1, 2]), - logi = NullableArray([true, false]), - chr = NullableArray(["ab", "c"]), - factor = categorical(NullableArray(["ab", "c"]), true), - cplx = NullableArray(Complex128[1.1+0.5im, 1.0im])) - rdf = sexp2julia(load("$testdir/data/types.rda",convert=false)["df"]) - @test eltypes(rdf) == eltypes(df) - @test rdf == df - rdf_ascii = sexp2julia(load("$testdir/data/types_ascii.rda",convert=false)["df"]) - @test eltypes(rdf_ascii) == eltypes(df) - @test rdf_ascii == df - - df[2, :] = Nullable() - append!(df, df[2, :]) - df[3, :num] = NaN - df[:, :cplx] = NullableArray([Nullable(), Complex128(1,NaN), NaN]) - @test isequal(sexp2julia(load("$testdir/data/NAs.rda",convert=false)["df"]), df) - # ASCII format saves NaN as NA - df[3, :num] = Nullable() - df[:, :cplx] = NullableArray{Complex128}(3) - @test sexp2julia(load("$testdir/data/NAs_ascii.rda",convert=false)["df"]) == df - - rda_names = names(sexp2julia(load("$testdir/data/names.rda",convert=false)["df"])) - expected_names = [:_end, :x!, :x1, :_B_C_, :x, :x_1] - @test rda_names == expected_names - rda_names = names(sexp2julia(load("$testdir/data/names_ascii.rda",convert=false)["df"])) - @test rda_names == [:_end, :x!, :x1, :_B_C_, :x, :x_1] - - rda_envs = load("$testdir/data/envs.rda",convert=false) - - rda_pairlists = load("$testdir/data/pairlists.rda",convert=false) - - rda_closures = load("$testdir/data/closures.rda",convert=false) - - rda_cmpfuns = load("$testdir/data/cmpfun.rda",convert=false) + @testset "Conversion to Julia types" begin + df = DataTable(num = NullableArray([1.1, 2.2]), + int = NullableArray(Int32[1, 2]), + logi = NullableArray([true, false]), + chr = NullableArray(["ab", "c"]), + factor = categorical(NullableArray(["ab", "c"]), true), + cplx = NullableArray(Complex128[1.1+0.5im, 1.0im])) + rdf = sexp2julia(load("$testdir/data/types.rda",convert=false)["df"]) + @test eltypes(rdf) == eltypes(df) + @test rdf == df + rdf_ascii = sexp2julia(load("$testdir/data/types_ascii.rda",convert=false)["df"]) + @test eltypes(rdf_ascii) == eltypes(df) + @test rdf_ascii == df + end + + @testset "NAs conversion" begin + df = DataTable(num = NullableArray([1.1, 2.2]), + int = NullableArray(Int32[1, 2]), + logi = NullableArray([true, false]), + chr = NullableArray(["ab", "c"]), + factor = categorical(NullableArray(["ab", "c"]), true), + cplx = NullableArray(Complex128[1.1+0.5im, 1.0im])) + df[2, :] = Nullable() + append!(df, df[2, :]) + df[3, :num] = NaN + df[:, :cplx] = NullableArray([Nullable(), Complex128(1,NaN), NaN]) + @test isequal(sexp2julia(load("$testdir/data/NAs.rda",convert=false)["df"]), df) + # ASCII format saves NaN as NA + df[3, :num] = Nullable() + df[:, :cplx] = NullableArray{Complex128}(3) + @test sexp2julia(load("$testdir/data/NAs_ascii.rda",convert=false)["df"]) == df + end + + @testset "Column names conversion" begin + rda_names = names(sexp2julia(load("$testdir/data/names.rda",convert=false)["df"])) + expected_names = [:_end, :x!, :x1, :_B_C_, :x, :x_1] + @test rda_names == expected_names + rda_names = names(sexp2julia(load("$testdir/data/names_ascii.rda",convert=false)["df"])) + @test rda_names == [:_end, :x!, :x1, :_B_C_, :x, :x_1] + end + + @testset "Reading RDA with complex types (environments, closures etc)" begin + rda_envs = load("$testdir/data/envs.rda",convert=false) + + rda_pairlists = load("$testdir/data/pairlists.rda",convert=false) + + rda_closures = load("$testdir/data/closures.rda",convert=false) + + rda_cmpfuns = load("$testdir/data/cmpfun.rda",convert=false) + end end From 57aef24a88305ce6f2a69cfccbffa3d0f9c59ad6 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Thu, 13 Jul 2017 19:42:14 +0000 Subject: [PATCH 7/7] implement reviewer suggestions --- src/convert.jl | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/convert.jl b/src/convert.jl index 20daca8..7c0c2da 100644 --- a/src/convert.jl +++ b/src/convert.jl @@ -18,7 +18,6 @@ end namask(ri::RVector{Int32}) = [i == R_NA_INT32 for i in ri.data] namask(rn::RNumericVector) = map(isna_float64, reinterpret(UInt64, rn.data)) # if re or im is NA, the whole complex number is NA -# FIXME avoid temporary Vector{Bool} namask(rc::RComplexVector) = [isna_float64(v.re) || isna_float64(v.im) for v in reinterpret(Complex{UInt64}, rc.data)] namask(rv::RNullableVector) = rv.na @@ -38,12 +37,13 @@ function julia_vector(rl::RLogicalVector, force_nullable::Bool) end # converts Vector{Int32} into Vector{R} replacing R_NA_INT32 with 0 -na2zero{R}(::Type{R}, v::Vector{Int32}) = [x != R_NA_INT32 ? R(x) : zero(R) for x in v] +# it's assumed that v fits into R +na2zero{R}(::Type{R}, v::Vector{Int32}) = [ifelse(x != R_NA_INT32, x % R, zero(R)) for x in v] -# convert to [Nullable]CategoricalArray{String} if `ri`is a factor, +# convert to [Nullable]CategoricalArray{String} if `ri` is a factor, # or to [Nullable]Array{Int32} otherwise function julia_vector(ri::RIntegerVector, force_nullable::Bool) - !isfactor(ri) && return _julia_vector(eltype(ri.data), ri, force_nullable) # not a factor + isfactor(ri) || return _julia_vector(eltype(ri.data), ri, force_nullable) # convert factor into [Nullable]CategoricalArray rlevels = getattr(ri, "levels", emptystrvec) @@ -55,9 +55,11 @@ function julia_vector(ri::RIntegerVector, force_nullable::Bool) # FIXME set ordered flag refs = na2zero(REFTYPE, ri.data) pool = CategoricalPool{String, REFTYPE}(rlevels) - (force_nullable || (findfirst(refs, zero(REFTYPE)) > 0)) ? - NullableCategoricalArray{String, 1, REFTYPE}(refs, pool) : - CategoricalArray{String, 1, REFTYPE}(refs, pool) + if force_nullable || (findfirst(refs, zero(REFTYPE)) > 0) + return NullableCategoricalArray{String, 1, REFTYPE}(refs, pool) + else + return CategoricalArray{String, 1, REFTYPE}(refs, pool) + end end function sexp2julia(rex::RSEXPREC)