@@ -37,6 +37,7 @@ groupby(cols)
37
37
38
38
* `d` : an AbstractDataFrame
39
39
* `cols` : data frame columns to group by
40
+ * `sort`: sort row groups (no sorting by default)
40
41
41
42
If `d` is not provided, a curried version of groupby is given.
42
43
@@ -81,20 +82,24 @@ df |> groupby([:a, :b]) |> [sum, length]
81
82
```
82
83
83
84
"""
84
- function groupby {T} (df:: AbstractDataFrame , cols:: Vector{T} )
85
+ function groupby {T} (df:: AbstractDataFrame , cols:: Vector{T} ; sort :: Bool = false )
85
86
sdf = df[cols]
86
87
df_groups = _group_rows (sdf)
87
88
# sort the groups
88
- group_perm = sortperm (sub (sdf, df_groups. rperm[df_groups. starts]))
89
+ if sort
90
+ group_perm = sortperm (sub (sdf, df_groups. rperm[df_groups. starts]))
91
+ permute! (df_groups. starts, group_perm)
92
+ permute! (df_groups. stops, group_perm)
93
+ end
89
94
GroupedDataFrame (df, cols, df_groups. rperm,
90
- df_groups. starts[group_perm] ,
91
- df_groups. stops[group_perm] )
95
+ df_groups. starts,
96
+ df_groups. stops)
92
97
end
93
- groupby (d:: AbstractDataFrame , cols) = groupby (d, [cols])
98
+ groupby (d:: AbstractDataFrame , cols; sort :: Bool = false ) = groupby (d, [cols], sort = sort )
94
99
95
100
# add a function curry
96
- groupby {T} (cols:: Vector{T} ) = x -> groupby (x, cols)
97
- groupby (cols) = x -> groupby (x, cols)
101
+ groupby {T} (cols:: Vector{T} ; sort :: Bool = false ) = x -> groupby (x, cols, sort = sort )
102
+ groupby (cols; sort :: Bool = false ) = x -> groupby (x, cols, sort = sort )
98
103
99
104
Base. start (gd:: GroupedDataFrame ) = 1
100
105
Base. next (gd:: GroupedDataFrame , state:: Int ) =
@@ -241,8 +246,8 @@ Split-apply-combine in one step; apply `f` to each grouping in `d`
241
246
based on columns `col`
242
247
243
248
```julia
244
- by(d::AbstractDataFrame, cols, f::Function)
245
- by(f::Function, d::AbstractDataFrame, cols)
249
+ by(d::AbstractDataFrame, cols, f::Function; sort::Bool = false )
250
+ by(f::Function, d::AbstractDataFrame, cols; sort::Bool = false )
246
251
```
247
252
248
253
### Arguments
@@ -251,6 +256,7 @@ by(f::Function, d::AbstractDataFrame, cols)
251
256
* `cols` : a column indicator (Symbol, Int, Vector{Symbol}, etc.)
252
257
* `f` : a function to be applied to groups; expects each argument to
253
258
be an AbstractDataFrame
259
+ * `sort`: sort row groups (no sorting by default)
254
260
255
261
`f` can return a value, a vector, or a DataFrame. For a value or
256
262
vector, these are merged into a column along with the `cols` keys. For
281
287
```
282
288
283
289
"""
284
- by (d:: AbstractDataFrame , cols, f:: Function ) = combine (map (f, groupby (d, cols)))
285
- by (f:: Function , d:: AbstractDataFrame , cols) = by (d, cols, f)
290
+ by (d:: AbstractDataFrame , cols, f:: Function ; sort:: Bool = false ) =
291
+ combine (map (f, groupby (d, cols, sort = sort)))
292
+ by (f:: Function , d:: AbstractDataFrame , cols; sort:: Bool = false ) =
293
+ by (d, cols, f, sort = sort)
286
294
287
295
#
288
296
# Aggregate convenience functions
@@ -342,8 +350,9 @@ Base.(:|>)(gd::GroupedDataFrame, fs::Vector{Function}) = aggregate(gd, fs)
342
350
# Groups DataFrame by cols before applying aggregate
343
351
function aggregate {T <: ColumnIndex} (d:: AbstractDataFrame ,
344
352
cols: :@compat (Union{T, AbstractVector{T}}),
345
- fs: :@compat (Union{Function, Vector{Function}}))
346
- aggregate (groupby (d, cols), fs)
353
+ fs: :@compat (Union{Function, Vector{Function}});
354
+ sort:: Bool = false )
355
+ aggregate (groupby (d, cols, sort = sort), fs)
347
356
end
348
357
349
358
function _makeheaders (fs:: Vector{Function} , cn:: Vector{Symbol} )
0 commit comments