|
11 | 11 |
|
12 | 12 | ##############################################################################
|
13 | 13 | ##
|
14 |
| -## Conversion of intermediate R objects into DataArray and DataFrame objects |
| 14 | +## Conversion of intermediate R objects into NullableArray and DataFrame objects |
15 | 15 | ##
|
16 | 16 | ##############################################################################
|
17 | 17 |
|
18 |
| -namask(rl::RLogicalVector) = BitArray(rl.data .== R_NA_INT32) |
19 |
| -namask(ri::RIntegerVector) = BitArray(ri.data .== R_NA_INT32) |
20 |
| -namask(rn::RNumericVector) = BitArray(map(isna_float64, reinterpret(UInt64, rn.data))) |
| 18 | +namask(rl::RLogicalVector) = [flag == R_NA_INT32 for flag in rl.data] |
| 19 | +namask(ri::RIntegerVector) = [flag == R_NA_INT32 for flag in ri.data] |
| 20 | +namask(rn::RNumericVector) = map(isna_float64, reinterpret(UInt64, rn.data)) |
21 | 21 | # if re or im is NA, the whole complex number is NA
|
22 | 22 | # FIXME avoid temporary Vector{Bool}
|
23 |
| -namask(rc::RComplexVector) = BitArray([isna_float64(v.re) || isna_float64(v.im) for v in reinterpret(Complex{UInt64}, rc.data)]) |
| 23 | +namask(rc::RComplexVector) = [isna_float64(v.re) || isna_float64(v.im) for v in reinterpret(Complex{UInt64}, rc.data)] |
24 | 24 | namask(rv::RNullableVector) = rv.na
|
25 | 25 |
|
26 |
| -DataArrays.data(rv::RVEC) = DataArray(rv.data, namask(rv)) |
| 26 | +# convert R vector into NullableArray |
| 27 | +nullable_vector(rv::RVEC) = NullableArray(rv.data, namask(rv)) |
27 | 28 |
|
28 |
| -function DataArrays.data(ri::RIntegerVector) |
29 |
| - if !isfactor(ri) return DataArray(ri.data, namask(ri)) end |
30 |
| - # convert factor into PooledDataArray |
| 29 | +# converts Vector{Int32} into Vector{R} replacing R_NA_INT32 with 0 |
| 30 | +na2zero{R}(::Type{R}, v::Vector{Int32}) = [x != R_NA_INT32 ? R(x) : zero(R) for x in v] |
| 31 | + |
| 32 | +# convert R factor into NullableCategoricalArray{String} |
| 33 | +function nullable_vector(ri::RIntegerVector) |
| 34 | + isfactor(ri) || return NullableArray(ri.data, namask(ri)) # not a factor |
| 35 | + # convert factor into NullableCategoricalArray |
31 | 36 | pool = getattr(ri, "levels", emptystrvec)
|
32 | 37 | sz = length(pool)
|
33 | 38 | REFTYPE = sz <= typemax(UInt8) ? UInt8 :
|
34 | 39 | sz <= typemax(UInt16) ? UInt16 :
|
35 | 40 | sz <= typemax(UInt32) ? UInt32 :
|
36 | 41 | UInt64
|
37 |
| - dd = ri.data |
38 |
| - dd[namask(ri)] = 0 |
39 |
| - refs = convert(Vector{REFTYPE}, dd) |
40 |
| - return PooledDataArray(DataArrays.RefArray(refs), pool) |
| 42 | + # FIXME set ordered flag |
| 43 | + return NullableCategoricalArray{String, 1, REFTYPE}(na2zero(REFTYPE, ri.data), |
| 44 | + CategoricalPool{String, REFTYPE}(pool)) |
41 | 45 | end
|
42 | 46 |
|
43 | 47 | function sexp2julia(rex::RSEXPREC)
|
44 | 48 | warn("Conversion of $(typeof(rex)) to Julia is not implemented")
|
45 | 49 | return nothing
|
46 | 50 | end
|
47 | 51 |
|
| 52 | +# FIXME remove when anynull(NullableCategoricalArray) would be available |
| 53 | +_anynull{T,N,R}(A::NullableCategoricalArray{T,N,R}) = any(r -> r == zero(R), A.refs) |
| 54 | +_anynull(A::NullableArray) = anynull(A) |
| 55 | + |
| 56 | +# convert nullable array without nulls into non-nullable array |
| 57 | +# `A` is expected to contain no nulls |
| 58 | +_nonnullable(A::NullableArray) = A.values |
| 59 | +_nonnullable{T,N,R}(A::NullableCategoricalArray{T,N,R}) = CategoricalArray{T,N,R}(A.refs, A.pool) |
| 60 | + |
48 | 61 | function sexp2julia(rv::RVEC)
|
49 |
| - # FIXME dimnames |
50 |
| - # FIXME forceDataArrays option to always convert to DataArray |
51 |
| - nas = namask(rv) |
52 |
| - hasna = any(nas) |
| 62 | + # TODO dimnames? |
| 63 | + # FIXME forceNullable option to always convert to NullableArray |
| 64 | + jv = nullable_vector(rv) |
| 65 | + hasna = _anynull(jv) |
53 | 66 | if hasnames(rv)
|
54 | 67 | # if data has no NA, convert to simple Vector
|
55 |
| - return DictoVec(hasna ? DataArray(rv.data, nas) : rv.data, names(rv)) |
| 68 | + return DictoVec(hasna ? jv : _nonnullable(jv), names(rv)) |
56 | 69 | else
|
57 | 70 | hasdims = hasdim(rv)
|
58 | 71 | if !hasdims && length(rv.data)==1
|
59 | 72 | # scalar
|
60 |
| - # FIXME handle NAs |
61 |
| - # if hasna |
62 |
| - return rv.data[1] |
| 73 | + return hasna ? jv[1] : get(jv[1]) |
63 | 74 | elseif !hasdims
|
64 | 75 | # vectors
|
65 |
| - return hasna ? DataArray(rv.data, nas) : rv.data |
| 76 | + return hasna ? jv : _nonnullable(jv) |
66 | 77 | else
|
67 | 78 | # matrices and so on
|
68 |
| - dims = tuple(convert(Vector{Int64}, getattr(rv, "dim"))...) |
69 |
| - return hasna ? DataArray(reshape(rv.data, dims), reshape(nas, dims)) : |
70 |
| - reshape(rv.data, dims) |
| 79 | + dims = tuple(convert(Vector{Int}, getattr(rv, "dim"))...) |
| 80 | + return reshape(jv, dims) |
71 | 81 | end
|
72 | 82 | end
|
73 | 83 | end
|
74 | 84 |
|
75 | 85 | function sexp2julia(rl::RList)
|
76 | 86 | if isdataframe(rl)
|
77 |
| - # FIXME remove Any type assertion workaround |
78 |
| - DataFrame(Any[data(col) for col in rl.data], map(identifier, names(rl))) |
| 87 | + DataFrame(Any[nullable_vector(col) for col in rl.data], map(identifier, names(rl))) |
79 | 88 | elseif hasnames(rl)
|
80 | 89 | DictoVec(Any[sexp2julia(item) for item in rl.data], names(rl))
|
81 | 90 | else
|
|
0 commit comments