Skip to content

Commit 285a028

Browse files
committed
moved unstacked_val into _unstack
1 parent 7de450d commit 285a028

File tree

2 files changed

+19
-20
lines changed

2 files changed

+19
-20
lines changed

src/abstractdataframe/reshape.jl

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -201,17 +201,16 @@ function unstack(df::AbstractDataFrame, rowkey::Int, colkey::Int, value::Int)
201201
refkeycol = CategoricalArray{Union{eltype(df[rowkey]), Missing}}(df[rowkey])
202202
droplevels!(refkeycol)
203203
keycol = CategoricalArray{Union{eltype(df[colkey]), Missing}}(df[colkey])
204+
droplevels!(keycol)
204205
valuecol = df[value]
205-
Nrow = length(refkeycol.pool)
206-
Ncol = length(keycol.pool)
207-
df2m = [similar_missing(valuecol, Nrow) for i in 1:Ncol]
208-
_unstack(df, rowkey, colkey, value, keycol, valuecol, df2m, refkeycol)
206+
_unstack(df, rowkey, colkey, value, keycol, valuecol, refkeycol)
209207
end
210208

211209
function _unstack(df::AbstractDataFrame, rowkey::Int,
212-
colkey::Int, value::Int, keycol, valuecol, df2m, refkeycol)
210+
colkey::Int, value::Int, keycol, valuecol, refkeycol)
213211
Nrow = length(refkeycol.pool)
214212
Ncol = length(keycol.pool)
213+
unstacked_val = [similar_missing(valuecol, Nrow) for i in 1:Ncol]
215214
hadmissing = false # have we encountered missing in refkeycol
216215
mask_filled = falses(Ncol, Nrow+1) # has a given [col,row] entry been filled?
217216
warned_dup = false # hawe we already printed duplicate entries warning?
@@ -233,28 +232,28 @@ function _unstack(df::AbstractDataFrame, rowkey::Int,
233232
if !hadmissing # if it is the first time we have to add a new row
234233
hadmissing = true
235234
# we use the fact that missing is greater than anything
236-
for i in eachindex(df2m)
237-
push!(df2m[i], missing)
235+
for i in eachindex(unstacked_val)
236+
push!(unstacked_val[i], missing)
238237
end
239238
end
240-
i = length(df2m[1])
239+
i = length(unstacked_val[1])
241240
else
242241
i = refkeycol_order[refkref]
243242
end
244-
if (!warned_dup) && mask_filled[j, i]
243+
if !warned_dup && mask_filled[j, i]
245244
warn("Duplicate entries in unstack at row $k for key "*
246245
"$(refkeycol[k]) and variable $(keycol[k]).")
247246
warned_dup = true
248247
end
249-
df2m[j][i] = valuecol[k]
248+
unstacked_val[j][i] = valuecol[k]
250249
mask_filled[j, i] = true
251250
end
252251
levs = levels(refkeycol)
253252
# we have to handle a case with missings in refkeycol as levs will skip missing
254253
col = similar_missing(df[rowkey], length(levs) + hadmissing)
255254
copy!(col, levs)
256255
hadmissing && (col[end] = missing)
257-
df2 = DataFrame(df2m, map(Symbol, levels(keycol)))
256+
df2 = DataFrame(unstacked_val, map(Symbol, levels(keycol)))
258257
insert!(df2, 1, col, _names(df)[rowkey])
259258
end
260259

@@ -281,15 +280,13 @@ function unstack(df::AbstractDataFrame, rowkeys::AbstractVector{Symbol}, colkey:
281280
length(rowkeys) == 1 && return unstack(df, rowkeys[1], colkey, value)
282281
g = groupby(df, rowkeys, sort=true)
283282
keycol = CategoricalArray{Union{eltype(df[colkey]), Missing}}(df[colkey])
283+
droplevels!(keycol)
284284
valuecol = df[value]
285-
Nrow = length(g)
286-
Ncol = length(levels(keycol))
287-
df2m = [similar_missing(valuecol, Nrow) for i in 1:Ncol]
288-
_unstack(df, rowkeys, colkey, value, keycol, valuecol, df2m, g)
285+
_unstack(df, rowkeys, colkey, value, keycol, valuecol, g)
289286
end
290287

291288
function _unstack(df::AbstractDataFrame, rowkeys::AbstractVector{Symbol},
292-
colkey::Int, value::Int, keycol, valuecol, df2m, g)
289+
colkey::Int, value::Int, keycol, valuecol, g)
293290
groupidxs = [g.idx[g.starts[i]:g.ends[i]] for i in 1:length(g.starts)]
294291
rowkey = zeros(Int, size(df, 1))
295292
for i in 1:length(groupidxs)
@@ -298,6 +295,7 @@ function _unstack(df::AbstractDataFrame, rowkeys::AbstractVector{Symbol},
298295
df1 = allowmissing!(df[g.idx[g.starts], g.cols], g.cols)
299296
Nrow = length(g)
300297
Ncol = length(levels(keycol))
298+
unstacked_val = [similar_missing(valuecol, Nrow) for i in 1:Ncol]
301299
mask_filled = falses(Ncol, Nrow)
302300
warned_dup = false
303301
warned_missing = false
@@ -313,15 +311,15 @@ function _unstack(df::AbstractDataFrame, rowkeys::AbstractVector{Symbol},
313311
end
314312
j = keycol_order[kref]
315313
i = rowkey[k]
316-
if (!warned_dup) && mask_filled[j, i]
314+
if !warned_dup && mask_filled[j, i]
317315
warn("Duplicate entries in unstack at row $k for key "*
318316
"$(tuple((df[1,s] for s in rowkeys)...)) and variable $(keycol[k]).")
319317
warned_dup = true
320318
end
321-
df2m[j][i] = valuecol[k]
319+
unstacked_val[j][i] = valuecol[k]
322320
mask_filled[j, i] = true
323321
end
324-
df2 = DataFrame(df2m, map(Symbol, levels(keycol)))
322+
df2 = DataFrame(unstacked_val, map(Symbol, levels(keycol)))
325323
hcat(df1, df2)
326324
end
327325

test/dataframe.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,10 +296,11 @@ module TestDataFrame
296296

297297
#Check the output of unstack
298298
df = DataFrame(Fish = CategoricalArray{Union{String, Missing}}(["Bob", "Bob", "Batman", "Batman"]),
299-
Key = Union{String, Missing}["Mass", "Color", "Mass", "Color"],
299+
Key = CategoricalArray{Union{String, Missing}}["Mass", "Color", "Mass", "Color"],
300300
Value = Union{String, Missing}["12 g", "Red", "18 g", "Grey"])
301301
# Check that reordering levels does not confuse unstack
302302
levels!(df[1], ["XXX", "Bob", "Batman"])
303+
levels!(df[2], ["YYY", "Color", "Mass"])
303304
#Unstack specifying a row column
304305
df2 = unstack(df, :Fish, :Key, :Value)
305306
@test levels(df[1]) == ["XXX", "Bob", "Batman"] # make sure we did not mess df[1] levels

0 commit comments

Comments
 (0)