11
11
12
12
# #############################################################################
13
13
# #
14
- # # Conversion of intermediate R objects into DataArray and DataFrame objects
14
+ # # Conversion of intermediate R objects into NullableArray and DataFrame objects
15
15
# #
16
16
# #############################################################################
17
17
18
- namask (rl:: RLogicalVector ) = BitArray (rl . data . == R_NA_INT32)
19
- namask (ri:: RIntegerVector ) = BitArray (ri . data . == R_NA_INT32)
20
- namask (rn:: RNumericVector ) = BitArray ( map (isna_float64, reinterpret (UInt64, rn. data) ))
18
+ namask (rl:: RLogicalVector ) = [flag == R_NA_INT32 for flag in rl . data]
19
+ namask (ri:: RIntegerVector ) = [flag == R_NA_INT32 for flag in ri . data]
20
+ namask (rn:: RNumericVector ) = map (isna_float64, reinterpret (UInt64, rn. data))
21
21
# if re or im is NA, the whole complex number is NA
22
22
# FIXME avoid temporary Vector{Bool}
23
- namask (rc:: RComplexVector ) = BitArray ( [isna_float64 (v. re) || isna_float64 (v. im) for v in reinterpret (Complex{UInt64}, rc. data)])
23
+ namask (rc:: RComplexVector ) = [isna_float64 (v. re) || isna_float64 (v. im) for v in reinterpret (Complex{UInt64}, rc. data)]
24
24
namask (rv:: RNullableVector ) = rv. na
25
25
26
- DataArrays. data (rv:: RVEC ) = DataArray (rv. data, namask (rv))
26
+ function _julia_vector (rv:: RVEC , force_nullable:: Bool )
27
+ na_mask = namask (rv)
28
+ (force_nullable || any (na_mask)) ? NullableArray (rv. data, na_mask) : rv. data
29
+ end
30
+
31
+ # convert R vector into either NullableArray
32
+ # or Array if force_nullable=false and there are no NAs
33
+ julia_vector (rv:: RVEC , force_nullable:: Bool ) = _julia_vector (rv, force_nullable)
34
+
35
+ # converts Vector{Int32} into Vector{R} replacing R_NA_INT32 with 0
36
+ na2zero {R} (:: Type{R} , v:: Vector{Int32} ) = [x != R_NA_INT32 ? R (x) : zero (R) for x in v]
37
+
38
+ # convert to [Nullable]CategoricalArray{String} if `ri`is a factor,
39
+ # or to [Nullable]Array{Int32} otherwise
40
+ function julia_vector (ri:: RIntegerVector , force_nullable:: Bool )
41
+ ! isfactor (ri) && return _julia_vector (ri, force_nullable) # not a factor
27
42
28
- function DataArrays. data (ri:: RIntegerVector )
29
- if ! isfactor (ri) return DataArray (ri. data, namask (ri)) end
30
- # convert factor into PooledDataArray
31
- pool = getattr (ri, " levels" , emptystrvec)
32
- sz = length (pool)
43
+ # convert factor into [Nullable]CategoricalArray
44
+ rlevels = getattr (ri, " levels" , emptystrvec)
45
+ sz = length (rlevels)
33
46
REFTYPE = sz <= typemax (UInt8) ? UInt8 :
34
47
sz <= typemax (UInt16) ? UInt16 :
35
48
sz <= typemax (UInt32) ? UInt32 :
36
49
UInt64
37
- dd = ri. data
38
- dd[namask (ri)] = 0
39
- refs = convert (Vector{REFTYPE}, dd)
40
- return PooledDataArray (DataArrays. RefArray (refs), pool)
50
+ # FIXME set ordered flag
51
+ refs = na2zero (REFTYPE, ri. data)
52
+ pool = CategoricalPool {String, REFTYPE} (rlevels)
53
+ (force_nullable || (findfirst (refs, zero (REFTYPE)) > 0 )) ?
54
+ NullableCategoricalArray {String, 1, REFTYPE} (refs, pool) :
55
+ CategoricalArray {String, 1, REFTYPE} (refs, pool)
41
56
end
42
57
43
58
function sexp2julia (rex:: RSEXPREC )
@@ -46,36 +61,32 @@ function sexp2julia(rex::RSEXPREC)
46
61
end
47
62
48
63
function sexp2julia (rv:: RVEC )
49
- # FIXME dimnames
50
- # FIXME forceDataArrays option to always convert to DataArray
51
- nas = namask (rv)
52
- hasna = any (nas)
64
+ # TODO dimnames?
65
+ # FIXME forceNullable option to always convert to NullableArray
66
+ jv = julia_vector (rv, false )
53
67
if hasnames (rv)
54
68
# if data has no NA, convert to simple Vector
55
- return DictoVec (hasna ? DataArray (rv . data, nas) : rv . data , names (rv))
69
+ return DictoVec (jv , names (rv))
56
70
else
57
71
hasdims = hasdim (rv)
58
72
if ! hasdims && length (rv. data)== 1
59
73
# scalar
60
- # FIXME handle NAs
61
- # if hasna
62
- return rv. data[1 ]
74
+ return jv[1 ]
63
75
elseif ! hasdims
64
76
# vectors
65
- return hasna ? DataArray (rv . data, nas) : rv . data
77
+ return jv
66
78
else
67
79
# matrices and so on
68
- dims = tuple (convert (Vector{Int64}, getattr (rv, " dim" ))... )
69
- return hasna ? DataArray (reshape (rv. data, dims), reshape (nas, dims)) :
70
- reshape (rv. data, dims)
80
+ dims = tuple (convert (Vector{Int}, getattr (rv, " dim" ))... )
81
+ return reshape (jv, dims)
71
82
end
72
83
end
73
84
end
74
85
75
86
function sexp2julia (rl:: RList )
76
87
if isdataframe (rl)
77
- # FIXME remove Any type assertion workaround
78
- DataFrame (Any[data (col) for col in rl. data], map (identifier, names (rl)))
88
+ # FIXME forceNullable option to always convert to NullableArray
89
+ DataFrame (Any[julia_vector (col, true ) for col in rl. data], map (identifier, names (rl)))
79
90
elseif hasnames (rl)
80
91
DictoVec (Any[sexp2julia (item) for item in rl. data], names (rl))
81
92
else
0 commit comments