@@ -152,7 +152,7 @@ Unstacks a DataFrame; convert from a long to wide format
152
152
```julia
153
153
unstack(df::AbstractDataFrame, rowkeys::Union{Symbol, Integer},
154
154
colkey::Union{Symbol, Integer}, value::Union{Symbol, Integer})
155
- unstack(df::AbstractDataFrame, rowkeys::Union{ AbstractVector{<:Union{Symbol, Integer} }},
155
+ unstack(df::AbstractDataFrame, rowkeys::AbstractVector{<:Union{Symbol, Integer}},
156
156
colkey::Union{Symbol, Integer}, value::Union{Symbol, Integer})
157
157
unstack(df::AbstractDataFrame, colkey::Union{Symbol, Integer},
158
158
value::Union{Symbol, Integer})
@@ -198,9 +198,9 @@ wide3 = unstack(long, [:id, :a], :variable, :value)
198
198
Note that there are some differences between the widened results above.
199
199
"""
200
200
function unstack (df:: AbstractDataFrame , rowkey:: Int , colkey:: Int , value:: Int )
201
- refkeycol = deepcopy (categorical (df[rowkey]))
201
+ refkeycol = deepcopy (categorical (df[rowkey])) # TODO : remove deepcopy after CategoricalArrays #110 is merged
202
202
droplevels! (refkeycol)
203
- keycol = deepcopy (categorical (df[colkey]))
203
+ keycol = deepcopy (categorical (df[colkey])) # TODO : remove deepcopy after CategoricalArrays #110 is merged
204
204
droplevels! (keycol)
205
205
valuecol = df[value]
206
206
_unstack (df, rowkey, colkey, value, keycol, valuecol, refkeycol)
@@ -212,9 +212,9 @@ function _unstack(df::AbstractDataFrame, rowkey::Int,
212
212
Ncol = length (keycol. pool)
213
213
unstacked_val = [similar_missing (valuecol, Nrow) for i in 1 : Ncol]
214
214
hadmissing = false # have we encountered missing in refkeycol
215
- mask_filled = falses (Ncol, Nrow+ 1 ) # has a given [col,row] entry been filled?
216
- warned_dup = false # hawe we already printed duplicate entries warning?
217
- warned_missing = false # hawe we already printed missing in keycol warning?
215
+ mask_filled = falses (Nrow+ 1 , Ncol ) # has a given [col,row] entry been filled?
216
+ warned_dup = false # have we already printed duplicate entries warning?
217
+ warned_missing = false # have we already printed missing in keycol warning?
218
218
keycol_order = Vector {Int} (CategoricalArrays. order (keycol. pool))
219
219
refkeycol_order = Vector {Int} (CategoricalArrays. order (refkeycol. pool))
220
220
for k in 1 : nrow (df)
@@ -240,13 +240,13 @@ function _unstack(df::AbstractDataFrame, rowkey::Int,
240
240
else
241
241
i = refkeycol_order[refkref]
242
242
end
243
- if ! warned_dup && mask_filled[j, i ]
243
+ if ! warned_dup && mask_filled[i, j ]
244
244
warn (" Duplicate entries in unstack at row $k for key " *
245
245
" $(refkeycol[k]) and variable $(keycol[k]) ." )
246
246
warned_dup = true
247
247
end
248
248
unstacked_val[j][i] = valuecol[k]
249
- mask_filled[j, i ] = true
249
+ mask_filled[i, j ] = true
250
250
end
251
251
levs = levels (refkeycol)
252
252
# we have to handle a case with missings in refkeycol as levs will skip missing
@@ -279,7 +279,7 @@ function unstack(df::AbstractDataFrame, rowkeys::AbstractVector{Symbol}, colkey:
279
279
length (rowkeys) == 0 && throw (ArgumentError (" No key column found" ))
280
280
length (rowkeys) == 1 && return unstack (df, rowkeys[1 ], colkey, value)
281
281
g = groupby (df, rowkeys, sort= true )
282
- keycol = deepcopy (categorical (df[colkey]))
282
+ keycol = deepcopy (categorical (df[colkey])) # TODO : remove deepcopy after CategoricalArrays #110 is merged
283
283
droplevels! (keycol)
284
284
valuecol = df[value]
285
285
_unstack (df, rowkeys, colkey, value, keycol, valuecol, g)
@@ -296,7 +296,7 @@ function _unstack(df::AbstractDataFrame, rowkeys::AbstractVector{Symbol},
296
296
Nrow = length (g)
297
297
Ncol = length (levels (keycol))
298
298
unstacked_val = [similar_missing (valuecol, Nrow) for i in 1 : Ncol]
299
- mask_filled = falses (Ncol, Nrow )
299
+ mask_filled = falses (Nrow, Ncol )
300
300
warned_dup = false
301
301
warned_missing = false
302
302
keycol_order = Vector {Int} (CategoricalArrays. order (keycol. pool))
@@ -311,13 +311,13 @@ function _unstack(df::AbstractDataFrame, rowkeys::AbstractVector{Symbol},
311
311
end
312
312
j = keycol_order[kref]
313
313
i = rowkey[k]
314
- if ! warned_dup && mask_filled[j, i ]
314
+ if ! warned_dup && mask_filled[i, j ]
315
315
warn (" Duplicate entries in unstack at row $k for key " *
316
316
" $(tuple ((df[1 ,s] for s in rowkeys). .. )) and variable $(keycol[k]) ." )
317
317
warned_dup = true
318
318
end
319
319
unstacked_val[j][i] = valuecol[k]
320
- mask_filled[j, i ] = true
320
+ mask_filled[i, j ] = true
321
321
end
322
322
df2 = DataFrame (unstacked_val, map (Symbol, levels (keycol)))
323
323
hcat (df1, df2)
0 commit comments