Skip to content

Commit d79d1c7

Browse files
committed
update to DataFrames 0.8+
* drop Julia 0.4 support (since DataFrames require Julia 0.5) * convert from using DataArrays to NullableArrays
1 parent 1d40335 commit d79d1c7

File tree

6 files changed

+54
-35
lines changed

6 files changed

+54
-35
lines changed

NEWS.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
## RData v0.1.0 Release Notes
2+
3+
Updated to DataFrames 0.9, dropped Julia v0.4
4+
5+
##### Changes
6+
* using NullableArrays.jl and CategoricalArrays.jl
7+
instead of DataArrays.jl ([#19], see [JuliaStats/DataFrames.jl#1008])
8+
* Julia v0.4 not supported (DataFrames.jl v0.9 requirements)
9+
110
## RData v0.0.4 Release Notes
211

312
Now the recommended way to load `.RData`/`.rda` files is by `FileIO.load()`.

REQUIRE

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
julia 0.4
2-
DataFrames 0.7
3-
DataArrays 0.3
1+
julia 0.5
2+
DataFrames 0.8+
43
FileIO 0.1.2
54
GZip 0.2
65
Compat 0.8

src/RData.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ __precompile__()
33
module RData
44

55
using Compat, DataFrames, GZip, FileIO
6-
import DataArrays: data
76
import DataFrames: identifier
87
import Compat: UTF8String, unsafe_string
98
import FileIO: load

src/convert.jl

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -11,71 +11,81 @@ end
1111

1212
##############################################################################
1313
##
14-
## Conversion of intermediate R objects into DataArray and DataFrame objects
14+
## Conversion of intermediate R objects into NullableArray and DataFrame objects
1515
##
1616
##############################################################################
1717

18-
namask(rl::RLogicalVector) = BitArray(rl.data .== R_NA_INT32)
19-
namask(ri::RIntegerVector) = BitArray(ri.data .== R_NA_INT32)
20-
namask(rn::RNumericVector) = BitArray(map(isna_float64, reinterpret(UInt64, rn.data)))
18+
namask(rl::RLogicalVector) = [flag == R_NA_INT32 for flag in rl.data]
19+
namask(ri::RIntegerVector) = [flag == R_NA_INT32 for flag in ri.data]
20+
namask(rn::RNumericVector) = map(isna_float64, reinterpret(UInt64, rn.data))
2121
# if re or im is NA, the whole complex number is NA
2222
# FIXME avoid temporary Vector{Bool}
23-
namask(rc::RComplexVector) = BitArray([isna_float64(v.re) || isna_float64(v.im) for v in reinterpret(Complex{UInt64}, rc.data)])
23+
namask(rc::RComplexVector) = [isna_float64(v.re) || isna_float64(v.im) for v in reinterpret(Complex{UInt64}, rc.data)]
2424
namask(rv::RNullableVector) = rv.na
2525

26-
DataArrays.data(rv::RVEC) = DataArray(rv.data, namask(rv))
26+
# convert R vector into NullableArray
27+
_nullable(rv::RVEC) = NullableArray(rv.data, namask(rv))
2728

28-
function DataArrays.data(ri::RIntegerVector)
29-
if !isfactor(ri) return DataArray(ri.data, namask(ri)) end
30-
# convert factor into PooledDataArray
29+
# converts Vector{Int32} into Vector{R} replacing R_NA_INT32 with 0
30+
_zero_nas{R}(::Type{R}, v::Vector{Int32}) = [x != R_NA_INT32 ? R(x) : zero(R) for x in v]
31+
32+
# convert R factor into NullableCategoricalArray{String}
33+
# TODO option to convert into Symbol etc?
34+
function _nullable(ri::RIntegerVector)
35+
isfactor(ri) || return NullableArray(ri.data, namask(ri)) # not a factor
36+
# convert factor into NullableCategoricalArray
3137
pool = getattr(ri, "levels", emptystrvec)
3238
sz = length(pool)
3339
REFTYPE = sz <= typemax(UInt8) ? UInt8 :
3440
sz <= typemax(UInt16) ? UInt16 :
3541
sz <= typemax(UInt32) ? UInt32 :
3642
UInt64
37-
dd = ri.data
38-
dd[namask(ri)] = 0
39-
refs = convert(Vector{REFTYPE}, dd)
40-
return PooledDataArray(DataArrays.RefArray(refs), pool)
43+
return NullableCategoricalArray{String, 1, REFTYPE}(_zero_nas(REFTYPE, ri.data),
44+
CategoricalPool{String, REFTYPE}(pool))
4145
end
4246

4347
function sexp2julia(rex::RSEXPREC)
4448
warn("Conversion of $(typeof(rex)) to Julia is not implemented")
4549
return nothing
4650
end
4751

52+
# FIXME remove when anynull(NullableCategoricalArray) would be available
53+
_anynull{T,N,R}(A::NullableCategoricalArray{T,N,R}) = any(A.refs == zero(R))
54+
_anynull(A::NullableArray) = anynull(A)
55+
56+
# convert nullable array without nulls into non-nullable array
57+
# `A` is expected to contain no nulls
58+
_dropnonulls(A::NullableArray) = A.values
59+
_dropnonulls{T,N,R}(A::NullableCategoricalArray{T,N,R}) = CategoricalArray{T,N,R}(A.refs, A.pool)
60+
4861
function sexp2julia(rv::RVEC)
4962
# FIXME dimnames
50-
# FIXME forceDataArrays option to always convert to DataArray
51-
nas = namask(rv)
52-
hasna = any(nas)
63+
# FIXME forceNullable option to always convert to NullableArray
64+
jv = _nullable(rv)
65+
hasna = _anynull(jv)
5366
if hasnames(rv)
5467
# if data has no NA, convert to simple Vector
55-
return DictoVec(hasna ? DataArray(rv.data, nas) : rv.data, names(rv))
68+
return DictoVec(hasna ? jv : _dropnonulls(jv), names(rv))
5669
else
5770
hasdims = hasdim(rv)
5871
if !hasdims && length(rv.data)==1
5972
# scalar
60-
# FIXME handle NAs
61-
# if hasna
62-
return rv.data[1]
73+
return hasna ? jv[1] : get(jv[1])
6374
elseif !hasdims
6475
# vectors
65-
return hasna ? DataArray(rv.data, nas) : rv.data
76+
return hasna ? jv : _dropnonulls(jv)
6677
else
6778
# matrices and so on
68-
dims = tuple(convert(Vector{Int64}, getattr(rv, "dim"))...)
69-
return hasna ? DataArray(reshape(rv.data, dims), reshape(nas, dims)) :
70-
reshape(rv.data, dims)
79+
dims = tuple(convert(Vector{Int}, getattr(rv, "dim"))...)
80+
return reshape(jv, dims)
7181
end
7282
end
7383
end
7484

7585
function sexp2julia(rl::RList)
7686
if isdataframe(rl)
7787
# FIXME remove Any type assertion workaround
78-
DataFrame(Any[data(col) for col in rl.data], map(identifier, names(rl)))
88+
DataFrame(Any[_nullable(col) for col in rl.data], map(identifier, names(rl)))
7989
elseif hasnames(rl)
8090
DictoVec(Any[sexp2julia(item) for item in rl.data], names(rl))
8191
else

src/sxtypes.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ typealias RComplexVector RVector{Complex128, CPLXSXP}
125125
"""
126126
immutable RNullableVector{T, S} <: RVEC{T, S}
127127
data::Vector{T}
128-
na::BitVector # mask of NA elements
128+
na::Vector{Bool} # mask of NA elements
129129
attr::Hash # collection of R object attributes
130130
end
131131

test/RDA.jl

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,21 @@ module TestRDA
2323
df[:int] = Int32[1, 2]
2424
df[:logi] = [true, false]
2525
df[:chr] = ["ab", "c"]
26-
df[:factor] = pool(df[:chr])
26+
df[:factor] = categorical(df[:chr])
2727
df[:cplx] = Complex128[1.1+0.5im, 1.0im]
2828
@test isequal(sexp2julia(load("$testdir/data/types.rda",convert=false)["df"]), df)
2929
@test isequal(sexp2julia(load("$testdir/data/types_ascii.rda",convert=false)["df"]), df)
3030

31-
df[2, :] = NA
31+
for col in DataFrames.columns(df)
32+
col[2] = Nullable{eltype(col)}() # FIXME nullify!() is not supported by CategoricalArrays
33+
end
3234
append!(df, df[2, :])
3335
df[3, :num] = NaN
34-
df[:, :cplx] = @data [NA, @compat(Complex128(1,NaN)), NaN]
36+
df[:, :cplx] = NullableArray{Complex128}(Nullable{Complex128}[Nullable{Complex128}(), Complex128(1,NaN), NaN])
3537
@test isequal(sexp2julia(load("$testdir/data/NAs.rda",convert=false)["df"]), df)
3638
# ASCII format saves NaN as NA
37-
df[3, :num] = NA
38-
df[:, :cplx] = @data [NA, NA, NA]
39+
df[3, :num] = Nullable{Complex128}()
40+
df[:, :cplx] = NullableArray{Complex128}(3)
3941
@test isequal(sexp2julia(load("$testdir/data/NAs_ascii.rda",convert=false)["df"]), df)
4042

4143
rda_names = names(sexp2julia(load("$testdir/data/names.rda",convert=false)["df"]))

0 commit comments

Comments
 (0)