Skip to content

Commit f03c9ce

Browse files
committed
update to DataFrames 0.8+
* drop Julia 0.4 support (since DataFrames require Julia 0.5) * convert from using DataArrays to NullableArrays
1 parent 1d40335 commit f03c9ce

File tree

8 files changed

+59
-42
lines changed

8 files changed

+59
-42
lines changed

.travis.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ os:
33
- linux
44
- osx
55
julia:
6-
- 0.4
76
- 0.5
87
- nightly
98
notifications:

NEWS.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
## RData v0.1.0 Release Notes
2+
3+
Updated to DataFrames 0.9, dropped Julia v0.4
4+
5+
##### Changes
6+
* using NullableArrays.jl and CategoricalArrays.jl
7+
instead of DataArrays.jl ([#19], see [JuliaStats/DataFrames.jl#1008])
8+
* Julia v0.4 not supported (DataFrames.jl v0.9 requirements)
9+
110
## RData v0.0.4 Release Notes
211

312
Now the recommended way to load `.RData`/`.rda` files is by `FileIO.load()`.
@@ -15,5 +24,7 @@ Initial release based on `DataFrames.read_rda()` ([JuliaStats/DataFrames.jl#1031
1524
[#9]: https://github.com/JuliaStats/RData.jl/issues/9
1625
[#10]: https://github.com/JuliaStats/RData.jl/issues/10
1726
[#15]: https://github.com/JuliaStats/RData.jl/issues/15
27+
[#19]: https://github.com/JuliaStats/RData.jl/issues/19
1828

29+
[JuliaStats/DataFrames.jl#1008]: https://github.com/JuliaStats/DataFrames.jl/pull/1008
1930
[JuliaStats/DataFrames.jl#1031]: https://github.com/JuliaStats/DataFrames.jl/pull/1031

REQUIRE

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
julia 0.4
2-
DataFrames 0.7
3-
DataArrays 0.3
1+
julia 0.5
2+
DataFrames 0.8+
43
FileIO 0.1.2
54
GZip 0.2
65
Compat 0.8

appveyor.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
environment:
22
matrix:
3-
- JULIAVERSION: "julialang/bin/winnt/x86/0.4/julia-0.4-latest-win32.exe"
4-
- JULIAVERSION: "julialang/bin/winnt/x64/0.4/julia-0.4-latest-win64.exe"
53
- JULIAVERSION: "julialang/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe"
64
- JULIAVERSION: "julialang/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe"
75
- JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe"

src/RData.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ __precompile__()
33
module RData
44

55
using Compat, DataFrames, GZip, FileIO
6-
import DataArrays: data
76
import DataFrames: identifier
87
import Compat: UTF8String, unsafe_string
98
import FileIO: load

src/convert.jl

Lines changed: 40 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -11,33 +11,48 @@ end
1111

1212
##############################################################################
1313
##
14-
## Conversion of intermediate R objects into DataArray and DataFrame objects
14+
## Conversion of intermediate R objects into NullableArray and DataFrame objects
1515
##
1616
##############################################################################
1717

18-
namask(rl::RLogicalVector) = BitArray(rl.data .== R_NA_INT32)
19-
namask(ri::RIntegerVector) = BitArray(ri.data .== R_NA_INT32)
20-
namask(rn::RNumericVector) = BitArray(map(isna_float64, reinterpret(UInt64, rn.data)))
18+
namask(rl::RLogicalVector) = [flag == R_NA_INT32 for flag in rl.data]
19+
namask(ri::RIntegerVector) = [flag == R_NA_INT32 for flag in ri.data]
20+
namask(rn::RNumericVector) = map(isna_float64, reinterpret(UInt64, rn.data))
2121
# if re or im is NA, the whole complex number is NA
2222
# FIXME avoid temporary Vector{Bool}
23-
namask(rc::RComplexVector) = BitArray([isna_float64(v.re) || isna_float64(v.im) for v in reinterpret(Complex{UInt64}, rc.data)])
23+
namask(rc::RComplexVector) = [isna_float64(v.re) || isna_float64(v.im) for v in reinterpret(Complex{UInt64}, rc.data)]
2424
namask(rv::RNullableVector) = rv.na
2525

26-
DataArrays.data(rv::RVEC) = DataArray(rv.data, namask(rv))
26+
function _julia_vector(rv::RVEC, force_nullable::Bool)
27+
na_mask = namask(rv)
28+
(force_nullable || any(na_mask)) ? NullableArray(rv.data, na_mask) : rv.data
29+
end
30+
31+
# convert R vector into either NullableArray
32+
# or Array if force_nullable=false and there are no NAs
33+
julia_vector(rv::RVEC, force_nullable::Bool) = _julia_vector(rv, force_nullable)
34+
35+
# converts Vector{Int32} into Vector{R} replacing R_NA_INT32 with 0
36+
na2zero{R}(::Type{R}, v::Vector{Int32}) = [x != R_NA_INT32 ? R(x) : zero(R) for x in v]
37+
38+
# convert to [Nullable]CategoricalArray{String} if `ri`is a factor,
39+
# or to [Nullable]Array{Int32} otherwise
40+
function julia_vector(ri::RIntegerVector, force_nullable::Bool)
41+
!isfactor(ri) && return _julia_vector(ri, force_nullable) # not a factor
2742

28-
function DataArrays.data(ri::RIntegerVector)
29-
if !isfactor(ri) return DataArray(ri.data, namask(ri)) end
30-
# convert factor into PooledDataArray
31-
pool = getattr(ri, "levels", emptystrvec)
32-
sz = length(pool)
43+
# convert factor into [Nullable]CategoricalArray
44+
rlevels = getattr(ri, "levels", emptystrvec)
45+
sz = length(rlevels)
3346
REFTYPE = sz <= typemax(UInt8) ? UInt8 :
3447
sz <= typemax(UInt16) ? UInt16 :
3548
sz <= typemax(UInt32) ? UInt32 :
3649
UInt64
37-
dd = ri.data
38-
dd[namask(ri)] = 0
39-
refs = convert(Vector{REFTYPE}, dd)
40-
return PooledDataArray(DataArrays.RefArray(refs), pool)
50+
# FIXME set ordered flag
51+
refs = na2zero(REFTYPE, ri.data)
52+
pool = CategoricalPool{String, REFTYPE}(rlevels)
53+
(force_nullable || (findfirst(refs, zero(REFTYPE)) > 0)) ?
54+
NullableCategoricalArray{String, 1, REFTYPE}(refs, pool) :
55+
CategoricalArray{String, 1, REFTYPE}(refs, pool)
4156
end
4257

4358
function sexp2julia(rex::RSEXPREC)
@@ -46,36 +61,32 @@ function sexp2julia(rex::RSEXPREC)
4661
end
4762

4863
function sexp2julia(rv::RVEC)
49-
# FIXME dimnames
50-
# FIXME forceDataArrays option to always convert to DataArray
51-
nas = namask(rv)
52-
hasna = any(nas)
64+
# TODO dimnames?
65+
# FIXME forceNullable option to always convert to NullableArray
66+
jv = julia_vector(rv, false)
5367
if hasnames(rv)
5468
# if data has no NA, convert to simple Vector
55-
return DictoVec(hasna ? DataArray(rv.data, nas) : rv.data, names(rv))
69+
return DictoVec(jv, names(rv))
5670
else
5771
hasdims = hasdim(rv)
5872
if !hasdims && length(rv.data)==1
5973
# scalar
60-
# FIXME handle NAs
61-
# if hasna
62-
return rv.data[1]
74+
return jv[1]
6375
elseif !hasdims
6476
# vectors
65-
return hasna ? DataArray(rv.data, nas) : rv.data
77+
return jv
6678
else
6779
# matrices and so on
68-
dims = tuple(convert(Vector{Int64}, getattr(rv, "dim"))...)
69-
return hasna ? DataArray(reshape(rv.data, dims), reshape(nas, dims)) :
70-
reshape(rv.data, dims)
80+
dims = tuple(convert(Vector{Int}, getattr(rv, "dim"))...)
81+
return reshape(jv, dims)
7182
end
7283
end
7384
end
7485

7586
function sexp2julia(rl::RList)
7687
if isdataframe(rl)
77-
# FIXME remove Any type assertion workaround
78-
DataFrame(Any[data(col) for col in rl.data], map(identifier, names(rl)))
88+
# FIXME forceNullable option to always convert to NullableArray
89+
DataFrame(Any[julia_vector(col, true) for col in rl.data], map(identifier, names(rl)))
7990
elseif hasnames(rl)
8091
DictoVec(Any[sexp2julia(item) for item in rl.data], names(rl))
8192
else

src/sxtypes.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ typealias RComplexVector RVector{Complex128, CPLXSXP}
125125
"""
126126
immutable RNullableVector{T, S} <: RVEC{T, S}
127127
data::Vector{T}
128-
na::BitVector # mask of NA elements
128+
na::Vector{Bool} # mask of NA elements
129129
attr::Hash # collection of R object attributes
130130
end
131131

test/RDA.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,19 @@ module TestRDA
2323
df[:int] = Int32[1, 2]
2424
df[:logi] = [true, false]
2525
df[:chr] = ["ab", "c"]
26-
df[:factor] = pool(df[:chr])
26+
df[:factor] = categorical(df[:chr])
2727
df[:cplx] = Complex128[1.1+0.5im, 1.0im]
2828
@test isequal(sexp2julia(load("$testdir/data/types.rda",convert=false)["df"]), df)
2929
@test isequal(sexp2julia(load("$testdir/data/types_ascii.rda",convert=false)["df"]), df)
3030

31-
df[2, :] = NA
31+
df[2, :] = Nullable()
3232
append!(df, df[2, :])
3333
df[3, :num] = NaN
34-
df[:, :cplx] = @data [NA, @compat(Complex128(1,NaN)), NaN]
34+
df[:, :cplx] = NullableArray([Nullable(), Complex128(1,NaN), NaN])
3535
@test isequal(sexp2julia(load("$testdir/data/NAs.rda",convert=false)["df"]), df)
3636
# ASCII format saves NaN as NA
37-
df[3, :num] = NA
38-
df[:, :cplx] = @data [NA, NA, NA]
37+
df[3, :num] = Nullable()
38+
df[:, :cplx] = NullableArray{Complex128}(3)
3939
@test isequal(sexp2julia(load("$testdir/data/NAs_ascii.rda",convert=false)["df"]), df)
4040

4141
rda_names = names(sexp2julia(load("$testdir/data/names.rda",convert=false)["df"]))

0 commit comments

Comments
 (0)