-
Notifications
You must be signed in to change notification settings - Fork 11
WIP: Consolidate hcat, append, merge, vcat and split functional overlaps #70
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,8 @@ The following are normally implemented for AbstractDataTables: | |
|
||
* [`describe`](@ref) : summarize columns | ||
* [`dump`](@ref) : show structure | ||
* `hcat` : horizontal concatenation | ||
* `merge` : horizontal concatenation | ||
* `merge!` : horizontal concatenation, modifies first argument in-place | ||
* `vcat` : vertical concatenation | ||
* `names` : columns names | ||
* [`names!`](@ref) : set columns names | ||
|
@@ -649,22 +650,29 @@ without(dt::AbstractDataTable, c::Any) = without(dt, index(dt)[c]) | |
|
||
############################################################################## | ||
## | ||
## Hcat / vcat | ||
## merge/merge!/append/append!/vcat | ||
## | ||
############################################################################## | ||
|
||
# hcat's first argument must be an AbstractDataTable | ||
# Trailing arguments (currently) may also be NullableVectors, Vectors, or scalars. | ||
|
||
# hcat! is defined in datatables/datatables.jl | ||
# Its first argument (currently) must be a DataTable. | ||
function Base.merge!(dt::AbstractDataTable, others::AbstractDataTable...) | ||
for other in others | ||
for (i, c) in enumerate(add_names(names(dt), names(other))) | ||
dt[c] = other[i] | ||
end | ||
end | ||
return dt | ||
end | ||
|
||
# catch-all to cover cases where indexing returns a DataTable and copy doesn't | ||
Base.hcat(dt::AbstractDataTable, x) = hcat!(dt[:, :], x) | ||
Base.hcat(dt1::AbstractDataTable, dt2::AbstractDataTable) = hcat!(dt1[:, :], dt2) | ||
Base.merge(dt::AbstractDataTable, dtn::AbstractDataTable...) = merge!(dt[:, :], dtn...) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
function Base.append!(dt1::AbstractDataTable, x::AbstractVector) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could use a one-liner here and below. |
||
merge!(dt1, DataTable(Any[x])) | ||
end | ||
|
||
Base.hcat(dt::AbstractDataTable, x, y...) = hcat!(hcat(dt, x), y...) | ||
Base.hcat(dt1::AbstractDataTable, dt2::AbstractDataTable, dtn::AbstractDataTable...) = hcat!(hcat(dt1, dt2), dtn...) | ||
function append(dt1::AbstractDataTable, x::AbstractVector) | ||
merge(dt1, DataTable(Any[x])) | ||
end | ||
|
||
@generated function promote_col_type(cols::AbstractVector...) | ||
elty = Base.promote_eltype(cols...) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -201,7 +201,11 @@ function combine(ga::GroupApplied) | |
idx[j + (1:n)] = gd.idx[start] | ||
j += n | ||
end | ||
hcat!(gd.parent[idx, gd.cols], valscat) | ||
if isa(valscat, DataTable) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When I broke this during my changes it helped me understand how There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't really understand. |
||
return merge!(gd.parent[idx, gd.cols], valscat) | ||
else | ||
return append!(gd.parent[idx, gd.cols], valscat) | ||
end | ||
end | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -109,7 +109,7 @@ module TestDataTable | |
dt = DataTable(a=[1, 2], b=[3., 4.]) | ||
dt2 = DataTable(b=["a", "b"], c=[:c, :d]) | ||
@test isequal(merge!(dt, dt2), dt) | ||
@test isequal(dt, DataTable(a=[1, 2], b=["a", "b"], c=[:c, :d])) | ||
@test isequal(dt, DataTable(a=[1, 2], b=[3., 4.], b_1=["a", "b"], c=[:c, :d])) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was another point of confusion. Depending on which method you used to horizontally concatenate, you could either have columns overwrite one another or have column names updated so duplicates are unique. Is everyone ok with defaulting to not overwriting column names? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The generic Maybe that's an argument to keep |
||
|
||
#test_group("Empty DataTable constructors") | ||
dt = DataTable(Nullable{Int}, 10, 3) | ||
|
@@ -322,7 +322,7 @@ module TestDataTable | |
dt = DataTable(A = 1:10, B = 'A':'J') | ||
@test !(dt[:,:] === dt) | ||
|
||
@test append!(DataTable(A = 1:2, B = 1:2), DataTable(A = 3:4, B = 3:4)) == DataTable(A=1:4, B = 1:4) | ||
@test vcat(DataTable(A = 1:2, B = 1:2), DataTable(A = 3:4, B = 3:4)) == DataTable(A=1:4, B = 1:4) | ||
dt = DataTable(A = NullableArray(1:3), B = NullableArray(4:6)) | ||
@test all(c -> isa(c, NullableArray), categorical!(deepcopy(dt)).columns) | ||
@test all(c -> isa(c, NullableCategoricalArray), categorical!(deepcopy(dt), [1,2]).columns) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
While I can see the point of implementing generic functions which exist in Base, I don't like the idea of creating
append
, with such a general name and so similar toappend!
. Especially sincemerge
does the same thing.