|
3 | 3 |
|
4 | 4 | function Base.convert(::Type{Hash}, pl::RPairList)
|
5 | 5 | res = Hash()
|
6 |
| - for i in 1:length(pl.items) |
7 |
| - setindex!(res, pl.items[i], pl.tags[i]) |
| 6 | + for i in eachindex(pl.items) |
| 7 | + @inbounds setindex!(res, pl.items[i], pl.tags[i]) |
8 | 8 | end
|
9 | 9 | res
|
10 | 10 | end
|
11 | 11 |
|
12 | 12 | ##############################################################################
|
13 | 13 | ##
|
14 |
| -## Conversion of intermediate R objects into DataArray and DataFrame objects |
| 14 | +## Conversion of intermediate R objects into Vector{T} and DataFrame objects |
15 | 15 | ##
|
16 | 16 | ##############################################################################
|
17 | 17 |
|
18 |
| -namask(rl::RLogicalVector) = BitArray(rl.data .== R_NA_INT32) |
19 |
| -namask(ri::RIntegerVector) = BitArray(ri.data .== R_NA_INT32) |
20 |
| -namask(rn::RNumericVector) = BitArray(map(isna_float64, reinterpret(UInt64, rn.data))) |
| 18 | +isna(x::Int32) = x == R_NA_INT32 |
| 19 | +isna(x::Float64) = isna_float64(reinterpret(UInt64, x)) |
21 | 20 | # if re or im is NA, the whole complex number is NA
|
22 |
| -# FIXME avoid temporary Vector{Bool} |
23 |
| -namask(rc::RComplexVector) = BitArray([isna_float64(v.re) || isna_float64(v.im) for v in reinterpret(Complex{UInt64}, rc.data)]) |
24 |
| -namask(rv::RNullableVector) = rv.na |
| 21 | +isna(x::Complex128) = isna(real(x)) || isna(imag(x)) |
25 | 22 |
|
26 |
| -DataArrays.data(rv::RVEC) = DataArray(rv.data, namask(rv)) |
| 23 | +# convert R vector into Vector holding elements of type T |
| 24 | +# if force_missing is true, the result is always Vector{Union{T,Missing}}, |
| 25 | +# otherwise it's Vector{T} if `rv` doesn't contain NAs |
| 26 | +function jlvec(::Type{T}, rv::RVEC, force_missing::Bool=true) where T |
| 27 | + anyna = any(isna, rv.data) |
| 28 | + if force_missing || anyna |
| 29 | + res = convert(Vector{Union{T,Missing}}, rv.data) |
| 30 | + if anyna |
| 31 | + @inbounds for (i,x) in enumerate(rv.data) |
| 32 | + isna(x) && (res[i] = missing) |
| 33 | + end |
| 34 | + end |
| 35 | + return res |
| 36 | + else |
| 37 | + return convert(Vector{T}, rv.data) |
| 38 | + end |
| 39 | +end |
| 40 | + |
| 41 | +# convert R nullable vector (has an explicit NA mask) into Vector{T[?]} |
| 42 | +function jlvec(::Type{T}, rv::RNullableVector{R}, force_missing::Bool=true) where {T, R} |
| 43 | + anyna = any(rv.na) |
| 44 | + if force_missing || anyna |
| 45 | + res = convert(Vector{Union{T,Missing}}, rv.data) |
| 46 | + anyna && @inbounds res[rv.na] = missing |
| 47 | + return res |
| 48 | + else |
| 49 | + return convert(Vector{T}, rv.data) |
| 50 | + end |
| 51 | +end |
| 52 | + |
| 53 | +# convert R vector into Vector of appropriate type |
| 54 | +jlvec(rv::RVEC, force_missing::Bool=true) = jlvec(eltype(rv.data), rv, force_missing) |
27 | 55 |
|
28 |
| -function DataArrays.data(ri::RIntegerVector) |
29 |
| - if !isfactor(ri) return DataArray(ri.data, namask(ri)) end |
30 |
| - # convert factor into PooledDataArray |
31 |
| - pool = getattr(ri, "levels", emptystrvec) |
32 |
| - sz = length(pool) |
| 56 | +# convert R logical vector (uses Int32 to store values) into Vector{Bool[?]} |
| 57 | +function jlvec(rl::RLogicalVector, force_missing::Bool=true) |
| 58 | + anyna = any(isna, rl.data) |
| 59 | + if force_missing || anyna |
| 60 | + return Union{Bool,Missing}[ifelse(isna(x), missing, x != 0) for x in rl.data] |
| 61 | + else |
| 62 | + return Bool[x != 0 for x in rl.data] |
| 63 | + end |
| 64 | +end |
| 65 | + |
| 66 | +# kernel method that converts Vector{Int32} into Vector{R} replacing R_NA_INT32 with 0 |
| 67 | +# it's assumed that v fits into R |
| 68 | +na2zero(::Type{R}, v::Vector{Int32}) where R = |
| 69 | + [ifelse(!isna(x), x % R, zero(R)) for x in v] |
| 70 | + |
| 71 | +# convert to CategoricalVector{String[?]} if `ri` is a factor, |
| 72 | +# or to Vector{Int32[?]} otherwise |
| 73 | +function jlvec(ri::RIntegerVector, force_missing::Bool=true) |
| 74 | + isfactor(ri) || return jlvec(eltype(ri.data), ri, force_missing) |
| 75 | + |
| 76 | + rlevels = getattr(ri, "levels", emptystrvec) |
| 77 | + sz = length(rlevels) |
33 | 78 | REFTYPE = sz <= typemax(UInt8) ? UInt8 :
|
34 | 79 | sz <= typemax(UInt16) ? UInt16 :
|
35 | 80 | sz <= typemax(UInt32) ? UInt32 :
|
36 | 81 | UInt64
|
37 |
| - dd = ri.data |
38 |
| - dd[namask(ri)] = 0 |
39 |
| - refs = convert(Vector{REFTYPE}, dd) |
40 |
| - return PooledDataArray(DataArrays.RefArray(refs), pool) |
| 82 | + # FIXME set ordered flag |
| 83 | + refs = na2zero(REFTYPE, ri.data) |
| 84 | + anyna = any(iszero, refs) |
| 85 | + pool = CategoricalPool{String, REFTYPE}(rlevels) |
| 86 | + if force_missing || anyna |
| 87 | + return CategoricalArray{Union{String, Missing}, 1}(refs, pool) |
| 88 | + else |
| 89 | + return CategoricalArray{String, 1}(refs, pool) |
| 90 | + end |
41 | 91 | end
|
42 | 92 |
|
43 |
| -# convert R logical vector (uses Int32 to store values) into DataVector{Bool} |
44 |
| -DataArrays.data(rl::RLogicalVector) = |
45 |
| - return DataArray(Bool[x != 0 for x in rl.data], namask(rl)) |
46 |
| - |
47 | 93 | function sexp2julia(rex::RSEXPREC)
|
48 | 94 | warn("Conversion of $(typeof(rex)) to Julia is not implemented")
|
49 | 95 | return nothing
|
50 | 96 | end
|
51 | 97 |
|
52 | 98 | function sexp2julia(rv::RVEC)
|
53 |
| - # FIXME dimnames |
54 |
| - # FIXME forceDataArrays option to always convert to DataArray |
55 |
| - nas = namask(rv) |
56 |
| - hasna = any(nas) |
| 99 | + # TODO dimnames? |
| 100 | + # FIXME add force_missing option to control whether always convert to Union{T, Missing} |
| 101 | + jv = jlvec(rv, false) |
57 | 102 | if hasnames(rv)
|
58 | 103 | # if data has no NA, convert to simple Vector
|
59 |
| - return DictoVec(hasna ? DataArray(rv.data, nas) : rv.data, names(rv)) |
| 104 | + return DictoVec(jv, names(rv)) |
60 | 105 | else
|
61 | 106 | hasdims = hasdim(rv)
|
62 | 107 | if !hasdims && length(rv.data)==1
|
63 | 108 | # scalar
|
64 |
| - # FIXME handle NAs |
65 |
| - # if hasna |
66 |
| - return rv.data[1] |
| 109 | + return jv[1] |
67 | 110 | elseif !hasdims
|
68 | 111 | # vectors
|
69 |
| - return hasna ? DataArray(rv.data, nas) : rv.data |
| 112 | + return jv |
70 | 113 | else
|
71 | 114 | # matrices and so on
|
72 |
| - dims = tuple(convert(Vector{Int64}, getattr(rv, "dim"))...) |
73 |
| - return hasna ? DataArray(reshape(rv.data, dims), reshape(nas, dims)) : |
74 |
| - reshape(rv.data, dims) |
| 115 | + dims = tuple(convert(Vector{Int}, getattr(rv, "dim"))...) |
| 116 | + return reshape(jv, dims) |
75 | 117 | end
|
76 | 118 | end
|
77 | 119 | end
|
78 | 120 |
|
79 | 121 | function sexp2julia(rl::RList)
|
80 | 122 | if isdataframe(rl)
|
81 |
| - # FIXME remove Any type assertion workaround |
82 |
| - DataFrame(Any[data(col) for col in rl.data], map(identifier, names(rl))) |
| 123 | + # FIXME add force_missing option to control whether always convert to Union{T, Missing} |
| 124 | + DataFrame(Any[jlvec(col, false) for col in rl.data], identifier.(names(rl))) |
83 | 125 | elseif hasnames(rl)
|
84 | 126 | DictoVec(Any[sexp2julia(item) for item in rl.data], names(rl))
|
85 | 127 | else
|
|
0 commit comments