@@ -9,13 +9,10 @@ similar_nullable{T}(dv::AbstractArray{T}, dims::Union{Int, Tuple{Vararg{Int}}})
9
9
similar_nullable {T<:Nullable} (dv:: AbstractArray{T} , dims:: Union{Int, Tuple{Vararg{Int}}} ) =
10
10
NullableArray {eltype(T)} (dims)
11
11
12
- similar_nullable {T,R } (dv:: CategoricalArray{T,R } , dims:: Union{Int, Tuple{Vararg{Int}}} ) =
12
+ similar_nullable {T} (dv:: CategoricalArray{T} , dims:: Union{Int, Tuple{Vararg{Int}}} ) =
13
13
NullableCategoricalArray {T} (dims)
14
14
15
- similar_nullable (dt:: AbstractDataTable , dims:: Int ) =
16
- DataTable (Any[similar_nullable (x, dims) for x in columns (dt)], copy (index (dt)))
17
-
18
- similar_nullable {T,R} (dv:: NullableCategoricalArray{T,R} , dims:: Union{Int, Tuple{Vararg{Int}}} ) =
15
+ similar_nullable {T} (dv:: NullableCategoricalArray{T} , dims:: Union{Int, Tuple{Vararg{Int}}} ) =
19
16
NullableCategoricalArray {T} (dims)
20
17
21
18
# helper structure for DataTables joining
@@ -47,55 +44,74 @@ Base.length(x::RowIndexMap) = length(x.orig)
47
44
48
45
# composes the joined data table using the maps between the left and right
49
46
# table rows and the indices of rows in the result
50
- function compose_joined_table (joiner:: DataTableJoiner ,
47
+ function compose_joined_table (joiner:: DataTableJoiner , kind :: Symbol ,
51
48
left_ixs:: RowIndexMap , leftonly_ixs:: RowIndexMap ,
52
49
right_ixs:: RowIndexMap , rightonly_ixs:: RowIndexMap )
53
50
@assert length (left_ixs) == length (right_ixs)
54
51
# compose left half of the result taking all left columns
55
52
all_orig_left_ixs = vcat (left_ixs. orig, leftonly_ixs. orig)
56
- if length (leftonly_ixs) > 0
53
+
54
+ ril = length (right_ixs)
55
+ lil = length (left_ixs)
56
+ loil = length (leftonly_ixs)
57
+ roil = length (rightonly_ixs)
58
+
59
+ if loil > 0
57
60
# combine the matched (left_ixs.orig) and non-matched (leftonly_ixs.orig) indices of the left table rows
58
61
# preserving the original rows order
59
- all_orig_left_ixs = similar (left_ixs. orig, length (left_ixs) + length (leftonly_ixs) )
62
+ all_orig_left_ixs = similar (left_ixs. orig, lil + loil )
60
63
@inbounds all_orig_left_ixs[left_ixs. join] = left_ixs. orig
61
64
@inbounds all_orig_left_ixs[leftonly_ixs. join] = leftonly_ixs. orig
62
65
else
63
66
# the result contains only the left rows that are matched to right rows (left_ixs)
64
67
all_orig_left_ixs = left_ixs. orig # no need to copy left_ixs.orig as it's not used elsewhere
65
68
end
66
- ril = length (right_ixs)
67
- loil = length (leftonly_ixs)
68
- roil = length (rightonly_ixs)
69
- left_dt = DataTable (Any[resize! (col[all_orig_left_ixs], length (all_orig_left_ixs)+ roil)
70
- for col in columns (joiner. dtl)],
71
- names (joiner. dtl))
72
-
73
- # compose right half of the result taking all right columns excluding on
74
- dtr_noon = without (joiner. dtr, joiner. on_cols)
75
69
# permutation to swap rightonly and leftonly rows
76
70
right_perm = vcat (1 : ril, ril+ roil+ 1 : ril+ roil+ loil, ril+ 1 : ril+ roil)
77
71
if length (leftonly_ixs) > 0
78
72
# compose right_perm with the permutation that restores left rows order
79
73
right_perm[vcat (right_ixs. join, leftonly_ixs. join)] = right_perm[1 : ril+ loil]
80
74
end
81
75
all_orig_right_ixs = vcat (right_ixs. orig, rightonly_ixs. orig)
82
- right_dt = DataTable (Any[resize! (col[all_orig_right_ixs], length (all_orig_right_ixs)+ loil)[right_perm]
83
- for col in columns (dtr_noon)],
84
- names (dtr_noon))
85
- # merge left and right parts of the joined table
86
- res = hcat! (left_dt, right_dt)
76
+
77
+ # compose right half of the result taking all right columns excluding on
78
+ dtr_noon = without (joiner. dtr, joiner. on_cols)
79
+
80
+ nrow = length (all_orig_left_ixs) + roil
81
+ @assert nrow == length (all_orig_right_ixs) + loil
82
+ ncleft = ncol (joiner. dtl)
83
+ cols = Vector {Any} (ncleft + ncol (dtr_noon))
84
+ _similar = kind == :inner ? similar : similar_nullable
85
+ for (i, col) in enumerate (columns (joiner. dtl))
86
+ cols[i] = _similar (col, nrow)
87
+ fillcolumn! (cols[i], col, all_orig_left_ixs)
88
+ end
89
+ for (i, col) in enumerate (columns (dtr_noon))
90
+ cols[i+ ncleft] = _similar (col, nrow)
91
+ fillcolumn! (cols[i+ ncleft], col, all_orig_right_ixs)
92
+ permute! (cols[i+ ncleft], right_perm)
93
+ end
94
+ res = DataTable (cols, vcat (names (joiner. dtl), names (dtr_noon)))
87
95
88
96
if length (rightonly_ixs. join) > 0
89
97
# some left rows are nulls, so the values of the "on" columns
90
98
# need to be taken from the right
91
99
for (on_col_ix, on_col) in enumerate (joiner. on_cols)
92
100
# fix the result of the rightjoin by taking the nonnull values from the right table
93
- res[on_col][rightonly_ixs. join] = joiner. dtr_on[rightonly_ixs. orig, on_col_ix]
101
+ offset = nrow - length (rightonly_ixs. orig)
102
+ fillcolumn! (res[on_col], joiner. dtr_on[on_col_ix], rightonly_ixs. orig, offset)
94
103
end
95
104
end
96
105
return res
97
106
end
98
107
108
+ function fillcolumn! {T1, T2} (dtcol:: AbstractVector{T1} , refcol:: AbstractVector{T2} ,
109
+ indices:: Vector{Int} , offset:: Int = 0 )
110
+ @inbounds for (j, k) in enumerate (indices)
111
+ dtcol[j+ offset] = refcol[k]
112
+ end
113
+ end
114
+
99
115
# map the indices of the left and right joined tables
100
116
# to the indices of the rows in the resulting table
101
117
# if `nothing` is given, the corresponding map is not built
@@ -210,7 +226,8 @@ join(dt1::AbstractDataTable,
210
226
- `:cross` : a full Cartesian product of the key combinations; every
211
227
row of `dt1` is matched with every row of `dt2`
212
228
213
- Null values are filled in where needed to complete joins.
229
+ For the three join operations that may introduce missing values (`:outer`, `:left`,
230
+ and `:right`), all columns of the returned data table will be nullable.
214
231
215
232
### Result
216
233
@@ -246,22 +263,21 @@ function Base.join(dt1::AbstractDataTable,
246
263
joiner = DataTableJoiner (dt1, dt2, on)
247
264
248
265
if kind == :inner
249
- compose_joined_table (joiner, update_row_maps! (joiner. dtl_on, joiner. dtr_on,
250
- group_rows (joiner. dtr_on),
251
- true , false , true , false )... )
266
+ compose_joined_table (joiner, kind, update_row_maps! (joiner. dtl_on, joiner. dtr_on,
267
+ group_rows (joiner. dtr_on),
268
+ true , false , true , false )... )
252
269
elseif kind == :left
253
- compose_joined_table (joiner, update_row_maps! (joiner. dtl_on, joiner. dtr_on,
254
- group_rows (joiner. dtr_on),
255
- true , true , true , false )... )
270
+ compose_joined_table (joiner, kind, update_row_maps! (joiner. dtl_on, joiner. dtr_on,
271
+ group_rows (joiner. dtr_on),
272
+ true , true , true , false )... )
256
273
elseif kind == :right
257
- right_ixs, rightonly_ixs, left_ixs, leftonly_ixs = update_row_maps! (joiner. dtr_on, joiner. dtl_on,
258
- group_rows (joiner. dtl_on),
259
- true , true , true , false )
260
- compose_joined_table (joiner, left_ixs, leftonly_ixs, right_ixs, rightonly_ixs)
274
+ compose_joined_table (joiner, kind, update_row_maps! (joiner. dtr_on, joiner. dtl_on,
275
+ group_rows (joiner. dtl_on),
276
+ true , true , true , false )[[3 , 4 , 1 , 2 ]]. .. )
261
277
elseif kind == :outer
262
- compose_joined_table (joiner, update_row_maps! (joiner. dtl_on, joiner. dtr_on,
263
- group_rows (joiner. dtr_on),
264
- true , true , true , true )... )
278
+ compose_joined_table (joiner, kind, update_row_maps! (joiner. dtl_on, joiner. dtr_on,
279
+ group_rows (joiner. dtr_on),
280
+ true , true , true , true )... )
265
281
elseif kind == :semi
266
282
# hash the right rows
267
283
dtr_on_grp = group_rows (joiner. dtr_on)
0 commit comments