@@ -175,10 +175,10 @@ unstack(df::AbstractDataFrame)
175
175
176
176
* `::DataFrame` : the wide-format DataFrame
177
177
178
- If `colkey` contains `missing` values then they will be skipped and warning will be printed.
178
+ If `colkey` contains `missing` values then they will be skipped and a warning will be printed.
179
179
180
180
If combination of `rowkeys` and `colkey` contains duplicate entries then last `value` will
181
- be retained and warnign will be printed.
181
+ be retained and a warning will be printed.
182
182
183
183
### Examples
184
184
@@ -197,68 +197,66 @@ wide3 = unstack(long, [:id, :a], :variable, :value)
197
197
```
198
198
Note that there are some differences between the widened results above.
199
199
"""
200
- function unstack (df:: AbstractDataFrame , rowkeys :: Int , colkey:: Int , value:: Int )
201
- refkeycol = copy (CategoricalArray {Union{eltype(df[rowkeys ]), Missing}} (df[rowkeys ]))
200
+ function unstack (df:: AbstractDataFrame , rowkey :: Int , colkey:: Int , value:: Int )
201
+ refkeycol = copy (CategoricalArray {Union{eltype(df[rowkey ]), Missing}} (df[rowkey ]))
202
202
droplevels! (refkeycol)
203
203
keycol = CategoricalArray {Union{eltype(df[colkey]), Missing}} (df[colkey])
204
204
valuecol = df[value]
205
205
Nrow = length (refkeycol. pool)
206
206
Ncol = length (keycol. pool)
207
207
df2m = [similar_missing (valuecol, Nrow) for i in 1 : Ncol]
208
- _unstack (df, rowkeys , colkey, value, keycol, valuecol, df2m, refkeycol)
208
+ _unstack (df, rowkey , colkey, value, keycol, valuecol, df2m, refkeycol)
209
209
end
210
210
211
- function _unstack (df:: AbstractDataFrame , rowkeys :: Int ,
211
+ function _unstack (df:: AbstractDataFrame , rowkey :: Int ,
212
212
colkey:: Int , value:: Int , keycol, valuecol, df2m, refkeycol)
213
213
Nrow = length (refkeycol. pool)
214
214
Ncol = length (keycol. pool)
215
- hadmissing = false # have we encounered missing in refkeycol
216
- mask_filled = falses (Ncol, Nrow) # has a given [col,row] entry been filled?
217
- nowarning = true # do we print duplicate entries warning
218
- nowarning2 = true # do we print missing in keycol
219
- keycol_pool = Vector {Int} (CategoricalArrays. order (keycol. pool))
220
- refkeycol_pool = Vector {Int} (CategoricalArrays. order (refkeycol. pool))
215
+ hadmissing = false # have we encountered missing in refkeycol
216
+ mask_filled = falses (Ncol, Nrow+ 1 ) # has a given [col,row] entry been filled?
217
+ warned_missing_1 = false # do we print duplicate entries warning
218
+ warned_missing_2 = false # do we print missing in keycol
219
+ keycol_order = Vector {Int} (CategoricalArrays. order (keycol. pool))
220
+ refkeycol_order = Vector {Int} (CategoricalArrays. order (refkeycol. pool))
221
221
for k in 1 : nrow (df)
222
- keycol_refs = keycol. refs[k]
223
- if keycol_refs = = 0 # we have found missing in colkey
224
- if nowarning2
225
- warn (" Missing value in colkey variable at row $k . Skipping." )
226
- nowarning2 = false
222
+ kref = keycol. refs[k]
223
+ if kref < = 0 # we have found missing in colkey
224
+ if ! warned_missing_2
225
+ warn (" Missing value in ' $( _names (df)[ colkey]) ' variable at row $k . Skipping." )
226
+ warned_missing_2 = true
227
227
end
228
228
continue # skip processing it
229
229
end
230
- j = keycol_pool[keycol_refs ]
231
- refkeycol_refs = refkeycol. refs[k]
232
- if refkeycol_refs == 0 # we have found missing in rowkeys
230
+ j = keycol_order[kref ]
231
+ refkref = refkeycol. refs[k]
232
+ if refkref <= 0 # we have found missing in rowkey
233
233
if ! hadmissing # if it is the first time we have to add a new row
234
234
hadmissing = true
235
235
# we use the fact that missing is greater than anything
236
236
for i in eachindex (df2m)
237
- push! (df2m, missing )
237
+ push! (df2m[i] , missing )
238
238
end
239
- mask_filled = hcat (mask_filled, falses (length (df2m)))
240
239
end
241
240
i = length (df2m[1 ])
242
241
else
243
- i = refkeycol_pool[refkeycol_refs ]
242
+ i = refkeycol_order[refkref ]
244
243
end
245
- if nowarning && mask_filled[j, i]
244
+ if ( ! warned_missing_1) && mask_filled[j, i]
246
245
warn (" Duplicate entries in unstack at row $k ." )
247
- nowarning = false
246
+ warned_missing_1 = true
248
247
end
249
248
df2m[j][i] = valuecol[k]
250
249
mask_filled[j, i] = true
251
250
end
252
251
levs = levels (refkeycol)
253
252
# we have to handle a case with missings in refkeycol as levs will skip missing
253
+ col = similar_missing (df[rowkey], length (levs))
254
+ copy! (col2, levs)
254
255
if hadmissing
255
- col = [levs; missing ]
256
- else
257
- col = levs
256
+ push! (col, missing )
258
257
end
259
- col2 = similar_missing (df[rowkeys], length (col))
260
258
df2 = DataFrame (df2m, map (Symbol, levels (keycol)))
261
- insert! (df2, 1 , copy! (col2, col) , _names (df)[rowkeys ])
259
+ insert! (df2, 1 , col, _names (df)[rowkey ])
262
260
end
263
261
264
262
unstack (df:: AbstractDataFrame , rowkey:: ColumnIndex ,
@@ -276,7 +274,7 @@ unstack(df::AbstractDataFrame, colkey::Int, value::Int) =
276
274
unstack (df:: AbstractDataFrame , rowkeys, colkey:: ColumnIndex , value:: ColumnIndex ) =
277
275
unstack (df, rowkeys, index (df)[colkey], index (df)[value])
278
276
279
- unstack (df:: AbstractDataFrame , rowkeys:: AbstractVector{T } , colkey:: Int , value:: Int ) where T <: Real =
277
+ unstack (df:: AbstractDataFrame , rowkeys:: AbstractVector{<:Real } , colkey:: Int , value:: Int ) =
280
278
unstack (df, names (df)[rowkeys], colkey, value)
281
279
282
280
function unstack (df:: AbstractDataFrame , rowkeys:: AbstractVector{Symbol} , colkey:: Int , value:: Int )
@@ -302,23 +300,23 @@ function _unstack(df::AbstractDataFrame, rowkeys::AbstractVector{Symbol},
302
300
Nrow = length (g)
303
301
Ncol = length (levels (keycol))
304
302
mask_filled = falses (Ncol, Nrow)
305
- nowarning = true
306
- nowarning2 = true
307
- keycol_pool = Vector {Int} (CategoricalArrays. order (keycol. pool))
303
+ warned_missing_1 = false
304
+ warned_missing_2 = false
305
+ keycol_order = Vector {Int} (CategoricalArrays. order (keycol. pool))
308
306
for k in 1 : nrow (df)
309
- keycol_refs = keycol. refs[k]
310
- if keycol_refs = = 0
311
- if nowarning2
312
- warn (" Missing value in colkey variable at row $k . Skipping." )
313
- nowarning2 = false
307
+ kref = keycol. refs[k]
308
+ if kref < = 0
309
+ if ! warned_missing_2
310
+ warn (" Missing value in ' $( _names (df)[ colkey]) ' variable at row $k . Skipping." )
311
+ warned_missing_2 = true
314
312
end
315
313
continue
316
314
end
317
- j = keycol_pool[keycol_refs ]
315
+ j = keycol_order[kref ]
318
316
i = rowkey[k]
319
- if nowarning && mask_filled[j, i]
317
+ if warned_missing_1 && mask_filled[j, i]
320
318
warn (" Duplicate entries in unstack at row $k ." )
321
- nowarning = false
319
+ warned_missing_1 = true
322
320
end
323
321
df2m[j][i] = valuecol[k]
324
322
mask_filled[j, i] = true
0 commit comments