From 676b01f92d04b8dc3615ec916d0e42cd80046be4 Mon Sep 17 00:00:00 2001 From: Cameron Prybol Date: Wed, 24 May 2017 18:13:12 -0700 Subject: [PATCH] Consolidate hcat, append, merge, vcat and split functional overlaps removes hcat, append now adds columns to end of DataTable, merge concatenates DataTables horizontally, and vcat still concatenates vertically. append! and vcat no longer perform the same function. hcat and merge no longer perform the same function. --- src/DataTables.jl | 2 + src/abstractdatatable/abstractdatatable.jl | 30 ++++++++----- src/abstractdatatable/reshape.jl | 2 +- src/datatable/datatable.jl | 49 +--------------------- src/groupeddatatable/grouping.jl | 6 ++- src/other/index.jl | 6 ++- test/cat.jl | 32 +++++++------- test/datatable.jl | 4 +- 8 files changed, 53 insertions(+), 78 deletions(-) diff --git a/src/DataTables.jl b/src/DataTables.jl index 334c0c0..46039bb 100644 --- a/src/DataTables.jl +++ b/src/DataTables.jl @@ -39,6 +39,7 @@ export @~, SubDataTable, aggregate, + append, by, categorical!, colwise, @@ -52,6 +53,7 @@ export @~, eachrow, eltypes, groupby, + merge, melt, meltdt, names!, diff --git a/src/abstractdatatable/abstractdatatable.jl b/src/abstractdatatable/abstractdatatable.jl index e1e841e..1f3f8b2 100644 --- a/src/abstractdatatable/abstractdatatable.jl +++ b/src/abstractdatatable/abstractdatatable.jl @@ -13,7 +13,8 @@ The following are normally implemented for AbstractDataTables: * [`describe`](@ref) : summarize columns * [`dump`](@ref) : show structure -* `hcat` : horizontal concatenation +* `merge` : horizontal concatenation +* `merge!` : horizontal concatenation, modifies first argument in-place * `vcat` : vertical concatenation * `names` : columns names * [`names!`](@ref) : set columns names @@ -649,22 +650,29 @@ without(dt::AbstractDataTable, c::Any) = without(dt, index(dt)[c]) ############################################################################## ## -## Hcat / vcat +## merge/merge!/append/append!/vcat ## ############################################################################## -# hcat's first argument must be an AbstractDataTable -# Trailing arguments (currently) may also be NullableVectors, Vectors, or scalars. - -# hcat! is defined in datatables/datatables.jl -# Its first argument (currently) must be a DataTable. +function Base.merge!(dt::AbstractDataTable, others::AbstractDataTable...) + for other in others + for (i, c) in enumerate(add_names(names(dt), names(other))) + dt[c] = other[i] + end + end + return dt +end # catch-all to cover cases where indexing returns a DataTable and copy doesn't -Base.hcat(dt::AbstractDataTable, x) = hcat!(dt[:, :], x) -Base.hcat(dt1::AbstractDataTable, dt2::AbstractDataTable) = hcat!(dt1[:, :], dt2) +Base.merge(dt::AbstractDataTable, dtn::AbstractDataTable...) = merge!(dt[:, :], dtn...) + +function Base.append!(dt1::AbstractDataTable, x::AbstractVector) + merge!(dt1, DataTable(Any[x])) +end -Base.hcat(dt::AbstractDataTable, x, y...) = hcat!(hcat(dt, x), y...) -Base.hcat(dt1::AbstractDataTable, dt2::AbstractDataTable, dtn::AbstractDataTable...) = hcat!(hcat(dt1, dt2), dtn...) +function append(dt1::AbstractDataTable, x::AbstractVector) + merge(dt1, DataTable(Any[x])) +end @generated function promote_col_type(cols::AbstractVector...) elty = Base.promote_eltype(cols...) diff --git a/src/abstractdatatable/reshape.jl b/src/abstractdatatable/reshape.jl index cc6b97f..ca4fead 100644 --- a/src/abstractdatatable/reshape.jl +++ b/src/abstractdatatable/reshape.jl @@ -248,7 +248,7 @@ function unstack(dt::AbstractDataTable, colkey::Int, value::Int) dt2[j][i] = valuecol[k] end end - hcat(dt1, dt2) + merge!(dt1, dt2) end unstack(dt::AbstractDataTable) = unstack(dt, :id, :variable, :value) diff --git a/src/datatable/datatable.jl b/src/datatable/datatable.jl index 382d3f0..02f43cf 100644 --- a/src/datatable/datatable.jl +++ b/src/datatable/datatable.jl @@ -63,8 +63,8 @@ dt1[:, [1,3]] dt1[1:4, :] dt1[1:4, :C] dt1[1:4, :C] = 40. * dt1[1:4, :C] -[dt1; dt2] # vcat -[dt1 dt2] # hcat +vcat(dt1, dt2) +merge(dt1, dt2) size(dt1) ``` @@ -635,15 +635,6 @@ function Base.insert!(dt::DataTable, col_ind::Int, item, name::Symbol) insert!(dt, col_ind, upgrade_scalar(dt, item), name) end -function Base.merge!(dt::DataTable, others::AbstractDataTable...) - for other in others - for n in _names(other) - dt[n] = other[n] - end - end - return dt -end - ############################################################################## ## ## Copying @@ -717,31 +708,6 @@ function deleterows!(dt::DataTable, ind::AbstractVector{Int}) dt end -############################################################################## -## -## Hcat specialization -## -############################################################################## - -# hcat! for 2 arguments -function hcat!(dt1::DataTable, dt2::AbstractDataTable) - u = add_names(index(dt1), index(dt2)) - for i in 1:length(u) - dt1[u[i]] = dt2[i] - end - return dt1 -end -hcat!(dt::DataTable, x::AbstractVector) = hcat!(dt, DataTable(Any[x])) - -# hcat! for 1-n arguments -hcat!(dt::DataTable) = dt -hcat!(a::DataTable, b, c...) = hcat!(hcat!(a, b), c...) - -# hcat -Base.hcat(dt::DataTable, x) = hcat!(copy(dt), x) -Base.hcat(dt1::DataTable, dt2::AbstractDataTable) = hcat!(copy(dt1), dt2) -Base.hcat(dt1::DataTable, dt2::AbstractDataTable, dtn::AbstractDataTable...) = hcat!(hcat(dt1, dt2), dtn...) - ############################################################################## ## ## Nullability @@ -787,17 +753,6 @@ function categorical!(dt::DataTable, compact::Bool=true) dt end -function Base.append!(dt1::DataTable, dt2::AbstractDataTable) - _names(dt1) == _names(dt2) || error("Column names do not match") - eltypes(dt1) == eltypes(dt2) || error("Column eltypes do not match") - ncols = size(dt1, 2) - # TODO: This needs to be a sort of transaction to be 100% safe - for j in 1:ncols - append!(dt1[j], dt2[j]) - end - return dt1 -end - function Base.convert(::Type{DataTable}, A::AbstractMatrix) n = size(A, 2) cols = Vector{Any}(n) diff --git a/src/groupeddatatable/grouping.jl b/src/groupeddatatable/grouping.jl index 2472976..faacc53 100644 --- a/src/groupeddatatable/grouping.jl +++ b/src/groupeddatatable/grouping.jl @@ -201,7 +201,11 @@ function combine(ga::GroupApplied) idx[j + (1:n)] = gd.idx[start] j += n end - hcat!(gd.parent[idx, gd.cols], valscat) + if isa(valscat, DataTable) + return merge!(gd.parent[idx, gd.cols], valscat) + else + return append!(gd.parent[idx, gd.cols], valscat) + end end diff --git a/src/other/index.jl b/src/other/index.jl index 45ebaee..5842f6a 100644 --- a/src/other/index.jl +++ b/src/other/index.jl @@ -123,9 +123,11 @@ Base.getindex(x::AbstractIndex, idx::AbstractVector{Symbol}) = [x.lookup[i] for # Helpers function add_names(ind::Index, add_ind::Index) - u = names(add_ind) + add_names(_names(ind), names(add_ind)) +end - seen = Set(_names(ind)) +function add_names(a::Vector{Symbol}, u::Vector{Symbol}) + seen = Set(a) dups = Int[] for i in 1:length(u) diff --git a/test/cat.jl b/test/cat.jl index 52c230a..da484d7 100644 --- a/test/cat.jl +++ b/test/cat.jl @@ -3,7 +3,7 @@ module TestCat using DataTables # - # hcat + # merge # nvint = NullableArray(Nullable{Int}[1, 2, Nullable(), 4]) @@ -14,35 +14,39 @@ module TestCat dt4 = convert(DataTable, [1:4 1:4]) dt5 = DataTable(Any[NullableArray([1,2,3,4]), nvstr]) - dth = hcat(dt3, dt4) + dth = merge(dt3, dt4) @test size(dth, 2) == 3 @test names(dth) == [:x1, :x1_1, :x2] @test isequal(dth[:x1], dt3[:x1]) - @test isequal(dth, [dt3 dt4]) - @test isequal(dth, DataTables.hcat!(DataTable(), dt3, dt4)) + @test isequal(dth, merge(dt3, dt4)) + @test isequal(dth, merge!(DataTable(), dt3, dt4)) - dth3 = hcat(dt3, dt4, dt5) + dth3 = merge(dt3, dt4, dt5) @test names(dth3) == [:x1, :x1_1, :x2, :x1_2, :x2_1] - @test isequal(dth3, hcat(dth, dt5)) - @test isequal(dth3, DataTables.hcat!(DataTable(), dt3, dt4, dt5)) + @test isequal(dth3, merge(dth, dt5)) + @test isequal(dth3, merge!(DataTable(), dt3, dt4, dt5)) - @test isequal(dt2, DataTables.hcat!(dt2)) + @test isequal(dt2, merge!(dt2)) - @testset "hcat ::AbstractDataTable" begin + @testset "merge ::AbstractDataTable" begin dt = DataTable(A = repeat('A':'C', inner=4), B = 1:12) gd = groupby(dt, :A) answer = DataTable(A = fill('A', 4), B = 1:4, A_1 = 'B', B_1 = 5:8, A_2 = 'C', B_2 = 9:12) - @test hcat(gd...) == answer + @test merge(gd...) == answer answer = answer[1:4] - @test hcat(gd[1], gd[2]) == answer + @test merge(gd[1], gd[2]) == answer end - @testset "hcat ::Vectors" begin + @testset "append ::Vectors" begin dt = DataTable() - DataTables.hcat!(dt, NullableCategoricalVector(1:10)) + append!(dt, NullableCategoricalVector(1:10)) @test isequal(dt[1], NullableCategoricalVector(1:10)) - DataTables.hcat!(dt, NullableArray(1:10)) + append!(dt, NullableArray(1:10)) @test isequal(dt[2], NullableArray(1:10)) + dt2 = append(dt, collect(1:10)) + @test isequal(dt2[3], collect(1:10)) + @test ncol(dt) == 2 + @test ncol(dt2) == 3 end # diff --git a/test/datatable.jl b/test/datatable.jl index 7c7175c..a7b7c2b 100644 --- a/test/datatable.jl +++ b/test/datatable.jl @@ -109,7 +109,7 @@ module TestDataTable dt = DataTable(a=[1, 2], b=[3., 4.]) dt2 = DataTable(b=["a", "b"], c=[:c, :d]) @test isequal(merge!(dt, dt2), dt) - @test isequal(dt, DataTable(a=[1, 2], b=["a", "b"], c=[:c, :d])) + @test isequal(dt, DataTable(a=[1, 2], b=[3., 4.], b_1=["a", "b"], c=[:c, :d])) #test_group("Empty DataTable constructors") dt = DataTable(Nullable{Int}, 10, 3) @@ -322,7 +322,7 @@ module TestDataTable dt = DataTable(A = 1:10, B = 'A':'J') @test !(dt[:,:] === dt) - @test append!(DataTable(A = 1:2, B = 1:2), DataTable(A = 3:4, B = 3:4)) == DataTable(A=1:4, B = 1:4) + @test vcat(DataTable(A = 1:2, B = 1:2), DataTable(A = 3:4, B = 3:4)) == DataTable(A=1:4, B = 1:4) dt = DataTable(A = NullableArray(1:3), B = NullableArray(4:6)) @test all(c -> isa(c, NullableArray), categorical!(deepcopy(dt)).columns) @test all(c -> isa(c, NullableCategoricalArray), categorical!(deepcopy(dt), [1,2]).columns)