Skip to content

Commit 5f14a8b

Browse files
committed
update to DataFrames 0.8+
* drop Julia 0.4 support (since DataFrames require Julia 0.5) * convert from using DataArrays to NullableArrays
1 parent 1d40335 commit 5f14a8b

File tree

8 files changed

+55
-40
lines changed

8 files changed

+55
-40
lines changed

.travis.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ os:
33
- linux
44
- osx
55
julia:
6-
- 0.4
76
- 0.5
87
- nightly
98
notifications:

NEWS.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
## RData v0.1.0 Release Notes
2+
3+
Updated to DataFrames 0.9, dropped Julia v0.4
4+
5+
##### Changes
6+
* using NullableArrays.jl and CategoricalArrays.jl
7+
instead of DataArrays.jl ([#19], see [JuliaStats/DataFrames.jl#1008])
8+
* Julia v0.4 not supported (DataFrames.jl v0.9 requirements)
9+
110
## RData v0.0.4 Release Notes
211

312
Now the recommended way to load `.RData`/`.rda` files is by `FileIO.load()`.
@@ -15,5 +24,7 @@ Initial release based on `DataFrames.read_rda()` ([JuliaStats/DataFrames.jl#1031
1524
[#9]: https://github.com/JuliaStats/RData.jl/issues/9
1625
[#10]: https://github.com/JuliaStats/RData.jl/issues/10
1726
[#15]: https://github.com/JuliaStats/RData.jl/issues/15
27+
[#19]: https://github.com/JuliaStats/RData.jl/issues/19
1828

29+
[JuliaStats/DataFrames.jl#1008]: https://github.com/JuliaStats/DataFrames.jl/pull/1008
1930
[JuliaStats/DataFrames.jl#1031]: https://github.com/JuliaStats/DataFrames.jl/pull/1031

REQUIRE

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
julia 0.4
2-
DataFrames 0.7
3-
DataArrays 0.3
1+
julia 0.5
2+
DataFrames 0.8+
43
FileIO 0.1.2
54
GZip 0.2
65
Compat 0.8

appveyor.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
environment:
22
matrix:
3-
- JULIAVERSION: "julialang/bin/winnt/x86/0.4/julia-0.4-latest-win32.exe"
4-
- JULIAVERSION: "julialang/bin/winnt/x64/0.4/julia-0.4-latest-win64.exe"
53
- JULIAVERSION: "julialang/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe"
64
- JULIAVERSION: "julialang/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe"
75
- JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe"

src/RData.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ __precompile__()
33
module RData
44

55
using Compat, DataFrames, GZip, FileIO
6-
import DataArrays: data
76
import DataFrames: identifier
87
import Compat: UTF8String, unsafe_string
98
import FileIO: load

src/convert.jl

Lines changed: 36 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,71 +11,80 @@ end
1111

1212
##############################################################################
1313
##
14-
## Conversion of intermediate R objects into DataArray and DataFrame objects
14+
## Conversion of intermediate R objects into NullableArray and DataFrame objects
1515
##
1616
##############################################################################
1717

18-
namask(rl::RLogicalVector) = BitArray(rl.data .== R_NA_INT32)
19-
namask(ri::RIntegerVector) = BitArray(ri.data .== R_NA_INT32)
20-
namask(rn::RNumericVector) = BitArray(map(isna_float64, reinterpret(UInt64, rn.data)))
18+
namask(rl::RLogicalVector) = [flag == R_NA_INT32 for flag in rl.data]
19+
namask(ri::RIntegerVector) = [flag == R_NA_INT32 for flag in ri.data]
20+
namask(rn::RNumericVector) = map(isna_float64, reinterpret(UInt64, rn.data))
2121
# if re or im is NA, the whole complex number is NA
2222
# FIXME avoid temporary Vector{Bool}
23-
namask(rc::RComplexVector) = BitArray([isna_float64(v.re) || isna_float64(v.im) for v in reinterpret(Complex{UInt64}, rc.data)])
23+
namask(rc::RComplexVector) = [isna_float64(v.re) || isna_float64(v.im) for v in reinterpret(Complex{UInt64}, rc.data)]
2424
namask(rv::RNullableVector) = rv.na
2525

26-
DataArrays.data(rv::RVEC) = DataArray(rv.data, namask(rv))
26+
# convert R vector into NullableArray
27+
nullable_vector(rv::RVEC) = NullableArray(rv.data, namask(rv))
2728

28-
function DataArrays.data(ri::RIntegerVector)
29-
if !isfactor(ri) return DataArray(ri.data, namask(ri)) end
30-
# convert factor into PooledDataArray
29+
# converts Vector{Int32} into Vector{R} replacing R_NA_INT32 with 0
30+
na2zero{R}(::Type{R}, v::Vector{Int32}) = [x != R_NA_INT32 ? R(x) : zero(R) for x in v]
31+
32+
# convert R factor into NullableCategoricalArray{String}
33+
function nullable_vector(ri::RIntegerVector)
34+
isfactor(ri) || return NullableArray(ri.data, namask(ri)) # not a factor
35+
# convert factor into NullableCategoricalArray
3136
pool = getattr(ri, "levels", emptystrvec)
3237
sz = length(pool)
3338
REFTYPE = sz <= typemax(UInt8) ? UInt8 :
3439
sz <= typemax(UInt16) ? UInt16 :
3540
sz <= typemax(UInt32) ? UInt32 :
3641
UInt64
37-
dd = ri.data
38-
dd[namask(ri)] = 0
39-
refs = convert(Vector{REFTYPE}, dd)
40-
return PooledDataArray(DataArrays.RefArray(refs), pool)
42+
# FIXME set ordered flag
43+
return NullableCategoricalArray{String, 1, REFTYPE}(na2zero(REFTYPE, ri.data),
44+
CategoricalPool{String, REFTYPE}(pool))
4145
end
4246

4347
function sexp2julia(rex::RSEXPREC)
4448
warn("Conversion of $(typeof(rex)) to Julia is not implemented")
4549
return nothing
4650
end
4751

52+
# FIXME remove when anynull(NullableCategoricalArray) would be available
53+
_anynull{T,N,R}(A::NullableCategoricalArray{T,N,R}) = any(r -> r == zero(R), A.refs)
54+
_anynull(A::NullableArray) = anynull(A)
55+
56+
# convert nullable array without nulls into non-nullable array
57+
# `A` is expected to contain no nulls
58+
_nonnullable(A::NullableArray) = A.values
59+
_nonnullable{T,N,R}(A::NullableCategoricalArray{T,N,R}) = CategoricalArray{T,N,R}(A.refs, A.pool)
60+
4861
function sexp2julia(rv::RVEC)
49-
# FIXME dimnames
50-
# FIXME forceDataArrays option to always convert to DataArray
51-
nas = namask(rv)
52-
hasna = any(nas)
62+
# TODO dimnames?
63+
# FIXME forceNullable option to always convert to NullableArray
64+
jv = nullable_vector(rv)
65+
hasna = _anynull(jv)
5366
if hasnames(rv)
5467
# if data has no NA, convert to simple Vector
55-
return DictoVec(hasna ? DataArray(rv.data, nas) : rv.data, names(rv))
68+
return DictoVec(hasna ? jv : _nonnullable(jv), names(rv))
5669
else
5770
hasdims = hasdim(rv)
5871
if !hasdims && length(rv.data)==1
5972
# scalar
60-
# FIXME handle NAs
61-
# if hasna
62-
return rv.data[1]
73+
return hasna ? jv[1] : get(jv[1])
6374
elseif !hasdims
6475
# vectors
65-
return hasna ? DataArray(rv.data, nas) : rv.data
76+
return hasna ? jv : _nonnullable(jv)
6677
else
6778
# matrices and so on
68-
dims = tuple(convert(Vector{Int64}, getattr(rv, "dim"))...)
69-
return hasna ? DataArray(reshape(rv.data, dims), reshape(nas, dims)) :
70-
reshape(rv.data, dims)
79+
dims = tuple(convert(Vector{Int}, getattr(rv, "dim"))...)
80+
return reshape(jv, dims)
7181
end
7282
end
7383
end
7484

7585
function sexp2julia(rl::RList)
7686
if isdataframe(rl)
77-
# FIXME remove Any type assertion workaround
78-
DataFrame(Any[data(col) for col in rl.data], map(identifier, names(rl)))
87+
DataFrame(Any[nullable_vector(col) for col in rl.data], map(identifier, names(rl)))
7988
elseif hasnames(rl)
8089
DictoVec(Any[sexp2julia(item) for item in rl.data], names(rl))
8190
else

src/sxtypes.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ typealias RComplexVector RVector{Complex128, CPLXSXP}
125125
"""
126126
immutable RNullableVector{T, S} <: RVEC{T, S}
127127
data::Vector{T}
128-
na::BitVector # mask of NA elements
128+
na::Vector{Bool} # mask of NA elements
129129
attr::Hash # collection of R object attributes
130130
end
131131

test/RDA.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,19 @@ module TestRDA
2323
df[:int] = Int32[1, 2]
2424
df[:logi] = [true, false]
2525
df[:chr] = ["ab", "c"]
26-
df[:factor] = pool(df[:chr])
26+
df[:factor] = categorical(df[:chr])
2727
df[:cplx] = Complex128[1.1+0.5im, 1.0im]
2828
@test isequal(sexp2julia(load("$testdir/data/types.rda",convert=false)["df"]), df)
2929
@test isequal(sexp2julia(load("$testdir/data/types_ascii.rda",convert=false)["df"]), df)
3030

31-
df[2, :] = NA
31+
df[2, :] = Nullable()
3232
append!(df, df[2, :])
3333
df[3, :num] = NaN
34-
df[:, :cplx] = @data [NA, @compat(Complex128(1,NaN)), NaN]
34+
df[:, :cplx] = NullableArray([Nullable(), Complex128(1,NaN), NaN])
3535
@test isequal(sexp2julia(load("$testdir/data/NAs.rda",convert=false)["df"]), df)
3636
# ASCII format saves NaN as NA
37-
df[3, :num] = NA
38-
df[:, :cplx] = @data [NA, NA, NA]
37+
df[3, :num] = Nullable()
38+
df[:, :cplx] = NullableArray{Complex128}(3)
3939
@test isequal(sexp2julia(load("$testdir/data/NAs_ascii.rda",convert=false)["df"]), df)
4040

4141
rda_names = names(sexp2julia(load("$testdir/data/names.rda",convert=false)["df"]))

0 commit comments

Comments
 (0)