@@ -201,17 +201,16 @@ function unstack(df::AbstractDataFrame, rowkey::Int, colkey::Int, value::Int)
201
201
refkeycol = CategoricalArray {Union{eltype(df[rowkey]), Missing}} (df[rowkey])
202
202
droplevels! (refkeycol)
203
203
keycol = CategoricalArray {Union{eltype(df[colkey]), Missing}} (df[colkey])
204
+ droplevels! (keycol)
204
205
valuecol = df[value]
205
- Nrow = length (refkeycol. pool)
206
- Ncol = length (keycol. pool)
207
- df2m = [similar_missing (valuecol, Nrow) for i in 1 : Ncol]
208
- _unstack (df, rowkey, colkey, value, keycol, valuecol, df2m, refkeycol)
206
+ _unstack (df, rowkey, colkey, value, keycol, valuecol, refkeycol)
209
207
end
210
208
211
209
function _unstack (df:: AbstractDataFrame , rowkey:: Int ,
212
- colkey:: Int , value:: Int , keycol, valuecol, df2m, refkeycol)
210
+ colkey:: Int , value:: Int , keycol, valuecol, refkeycol)
213
211
Nrow = length (refkeycol. pool)
214
212
Ncol = length (keycol. pool)
213
+ unstacked_val = [similar_missing (valuecol, Nrow) for i in 1 : Ncol]
215
214
hadmissing = false # have we encountered missing in refkeycol
216
215
mask_filled = falses (Ncol, Nrow+ 1 ) # has a given [col,row] entry been filled?
217
216
warned_dup = false # hawe we already printed duplicate entries warning?
@@ -233,28 +232,28 @@ function _unstack(df::AbstractDataFrame, rowkey::Int,
233
232
if ! hadmissing # if it is the first time we have to add a new row
234
233
hadmissing = true
235
234
# we use the fact that missing is greater than anything
236
- for i in eachindex (df2m )
237
- push! (df2m [i], missing )
235
+ for i in eachindex (unstacked_val )
236
+ push! (unstacked_val [i], missing )
238
237
end
239
238
end
240
- i = length (df2m [1 ])
239
+ i = length (unstacked_val [1 ])
241
240
else
242
241
i = refkeycol_order[refkref]
243
242
end
244
- if ( ! warned_dup) && mask_filled[j, i]
243
+ if ! warned_dup && mask_filled[j, i]
245
244
warn (" Duplicate entries in unstack at row $k for key " *
246
245
" $(refkeycol[k]) and variable $(keycol[k]) ." )
247
246
warned_dup = true
248
247
end
249
- df2m [j][i] = valuecol[k]
248
+ unstacked_val [j][i] = valuecol[k]
250
249
mask_filled[j, i] = true
251
250
end
252
251
levs = levels (refkeycol)
253
252
# we have to handle a case with missings in refkeycol as levs will skip missing
254
253
col = similar_missing (df[rowkey], length (levs) + hadmissing)
255
254
copy! (col, levs)
256
255
hadmissing && (col[end ] = missing )
257
- df2 = DataFrame (df2m , map (Symbol, levels (keycol)))
256
+ df2 = DataFrame (unstacked_val , map (Symbol, levels (keycol)))
258
257
insert! (df2, 1 , col, _names (df)[rowkey])
259
258
end
260
259
@@ -281,15 +280,13 @@ function unstack(df::AbstractDataFrame, rowkeys::AbstractVector{Symbol}, colkey:
281
280
length (rowkeys) == 1 && return unstack (df, rowkeys[1 ], colkey, value)
282
281
g = groupby (df, rowkeys, sort= true )
283
282
keycol = CategoricalArray {Union{eltype(df[colkey]), Missing}} (df[colkey])
283
+ droplevels! (keycol)
284
284
valuecol = df[value]
285
- Nrow = length (g)
286
- Ncol = length (levels (keycol))
287
- df2m = [similar_missing (valuecol, Nrow) for i in 1 : Ncol]
288
- _unstack (df, rowkeys, colkey, value, keycol, valuecol, df2m, g)
285
+ _unstack (df, rowkeys, colkey, value, keycol, valuecol, g)
289
286
end
290
287
291
288
function _unstack (df:: AbstractDataFrame , rowkeys:: AbstractVector{Symbol} ,
292
- colkey:: Int , value:: Int , keycol, valuecol, df2m, g)
289
+ colkey:: Int , value:: Int , keycol, valuecol, g)
293
290
groupidxs = [g. idx[g. starts[i]: g. ends[i]] for i in 1 : length (g. starts)]
294
291
rowkey = zeros (Int, size (df, 1 ))
295
292
for i in 1 : length (groupidxs)
@@ -298,6 +295,7 @@ function _unstack(df::AbstractDataFrame, rowkeys::AbstractVector{Symbol},
298
295
df1 = allowmissing! (df[g. idx[g. starts], g. cols], g. cols)
299
296
Nrow = length (g)
300
297
Ncol = length (levels (keycol))
298
+ unstacked_val = [similar_missing (valuecol, Nrow) for i in 1 : Ncol]
301
299
mask_filled = falses (Ncol, Nrow)
302
300
warned_dup = false
303
301
warned_missing = false
@@ -313,15 +311,15 @@ function _unstack(df::AbstractDataFrame, rowkeys::AbstractVector{Symbol},
313
311
end
314
312
j = keycol_order[kref]
315
313
i = rowkey[k]
316
- if ( ! warned_dup) && mask_filled[j, i]
314
+ if ! warned_dup && mask_filled[j, i]
317
315
warn (" Duplicate entries in unstack at row $k for key " *
318
316
" $(tuple ((df[1 ,s] for s in rowkeys). .. )) and variable $(keycol[k]) ." )
319
317
warned_dup = true
320
318
end
321
- df2m [j][i] = valuecol[k]
319
+ unstacked_val [j][i] = valuecol[k]
322
320
mask_filled[j, i] = true
323
321
end
324
- df2 = DataFrame (df2m , map (Symbol, levels (keycol)))
322
+ df2 = DataFrame (unstacked_val , map (Symbol, levels (keycol)))
325
323
hcat (df1, df2)
326
324
end
327
325
0 commit comments