diff --git a/src/DataTables.jl b/src/DataTables.jl index 334c0c0..46039bb 100644 --- a/src/DataTables.jl +++ b/src/DataTables.jl @@ -39,6 +39,7 @@ export @~, SubDataTable, aggregate, + append, by, categorical!, colwise, @@ -52,6 +53,7 @@ export @~, eachrow, eltypes, groupby, + merge, melt, meltdt, names!, diff --git a/src/abstractdatatable/abstractdatatable.jl b/src/abstractdatatable/abstractdatatable.jl index e1e841e..1f3f8b2 100644 --- a/src/abstractdatatable/abstractdatatable.jl +++ b/src/abstractdatatable/abstractdatatable.jl @@ -13,7 +13,8 @@ The following are normally implemented for AbstractDataTables: * [`describe`](@ref) : summarize columns * [`dump`](@ref) : show structure -* `hcat` : horizontal concatenation +* `merge` : horizontal concatenation +* `merge!` : horizontal concatenation, modifies first argument in-place * `vcat` : vertical concatenation * `names` : columns names * [`names!`](@ref) : set columns names @@ -649,22 +650,29 @@ without(dt::AbstractDataTable, c::Any) = without(dt, index(dt)[c]) ############################################################################## ## -## Hcat / vcat +## merge/merge!/append/append!/vcat ## ############################################################################## -# hcat's first argument must be an AbstractDataTable -# Trailing arguments (currently) may also be NullableVectors, Vectors, or scalars. - -# hcat! is defined in datatables/datatables.jl -# Its first argument (currently) must be a DataTable. +function Base.merge!(dt::AbstractDataTable, others::AbstractDataTable...) + for other in others + for (i, c) in enumerate(add_names(names(dt), names(other))) + dt[c] = other[i] + end + end + return dt +end # catch-all to cover cases where indexing returns a DataTable and copy doesn't -Base.hcat(dt::AbstractDataTable, x) = hcat!(dt[:, :], x) -Base.hcat(dt1::AbstractDataTable, dt2::AbstractDataTable) = hcat!(dt1[:, :], dt2) +Base.merge(dt::AbstractDataTable, dtn::AbstractDataTable...) = merge!(dt[:, :], dtn...) + +function Base.append!(dt1::AbstractDataTable, x::AbstractVector) + merge!(dt1, DataTable(Any[x])) +end -Base.hcat(dt::AbstractDataTable, x, y...) = hcat!(hcat(dt, x), y...) -Base.hcat(dt1::AbstractDataTable, dt2::AbstractDataTable, dtn::AbstractDataTable...) = hcat!(hcat(dt1, dt2), dtn...) +function append(dt1::AbstractDataTable, x::AbstractVector) + merge(dt1, DataTable(Any[x])) +end @generated function promote_col_type(cols::AbstractVector...) elty = Base.promote_eltype(cols...) diff --git a/src/abstractdatatable/reshape.jl b/src/abstractdatatable/reshape.jl index cc6b97f..ca4fead 100644 --- a/src/abstractdatatable/reshape.jl +++ b/src/abstractdatatable/reshape.jl @@ -248,7 +248,7 @@ function unstack(dt::AbstractDataTable, colkey::Int, value::Int) dt2[j][i] = valuecol[k] end end - hcat(dt1, dt2) + merge!(dt1, dt2) end unstack(dt::AbstractDataTable) = unstack(dt, :id, :variable, :value) diff --git a/src/datatable/datatable.jl b/src/datatable/datatable.jl index 382d3f0..02f43cf 100644 --- a/src/datatable/datatable.jl +++ b/src/datatable/datatable.jl @@ -63,8 +63,8 @@ dt1[:, [1,3]] dt1[1:4, :] dt1[1:4, :C] dt1[1:4, :C] = 40. * dt1[1:4, :C] -[dt1; dt2] # vcat -[dt1 dt2] # hcat +vcat(dt1, dt2) +merge(dt1, dt2) size(dt1) ``` @@ -635,15 +635,6 @@ function Base.insert!(dt::DataTable, col_ind::Int, item, name::Symbol) insert!(dt, col_ind, upgrade_scalar(dt, item), name) end -function Base.merge!(dt::DataTable, others::AbstractDataTable...) - for other in others - for n in _names(other) - dt[n] = other[n] - end - end - return dt -end - ############################################################################## ## ## Copying @@ -717,31 +708,6 @@ function deleterows!(dt::DataTable, ind::AbstractVector{Int}) dt end -############################################################################## -## -## Hcat specialization -## -############################################################################## - -# hcat! for 2 arguments -function hcat!(dt1::DataTable, dt2::AbstractDataTable) - u = add_names(index(dt1), index(dt2)) - for i in 1:length(u) - dt1[u[i]] = dt2[i] - end - return dt1 -end -hcat!(dt::DataTable, x::AbstractVector) = hcat!(dt, DataTable(Any[x])) - -# hcat! for 1-n arguments -hcat!(dt::DataTable) = dt -hcat!(a::DataTable, b, c...) = hcat!(hcat!(a, b), c...) - -# hcat -Base.hcat(dt::DataTable, x) = hcat!(copy(dt), x) -Base.hcat(dt1::DataTable, dt2::AbstractDataTable) = hcat!(copy(dt1), dt2) -Base.hcat(dt1::DataTable, dt2::AbstractDataTable, dtn::AbstractDataTable...) = hcat!(hcat(dt1, dt2), dtn...) - ############################################################################## ## ## Nullability @@ -787,17 +753,6 @@ function categorical!(dt::DataTable, compact::Bool=true) dt end -function Base.append!(dt1::DataTable, dt2::AbstractDataTable) - _names(dt1) == _names(dt2) || error("Column names do not match") - eltypes(dt1) == eltypes(dt2) || error("Column eltypes do not match") - ncols = size(dt1, 2) - # TODO: This needs to be a sort of transaction to be 100% safe - for j in 1:ncols - append!(dt1[j], dt2[j]) - end - return dt1 -end - function Base.convert(::Type{DataTable}, A::AbstractMatrix) n = size(A, 2) cols = Vector{Any}(n) diff --git a/src/groupeddatatable/grouping.jl b/src/groupeddatatable/grouping.jl index 2472976..faacc53 100644 --- a/src/groupeddatatable/grouping.jl +++ b/src/groupeddatatable/grouping.jl @@ -201,7 +201,11 @@ function combine(ga::GroupApplied) idx[j + (1:n)] = gd.idx[start] j += n end - hcat!(gd.parent[idx, gd.cols], valscat) + if isa(valscat, DataTable) + return merge!(gd.parent[idx, gd.cols], valscat) + else + return append!(gd.parent[idx, gd.cols], valscat) + end end diff --git a/src/other/index.jl b/src/other/index.jl index 45ebaee..5842f6a 100644 --- a/src/other/index.jl +++ b/src/other/index.jl @@ -123,9 +123,11 @@ Base.getindex(x::AbstractIndex, idx::AbstractVector{Symbol}) = [x.lookup[i] for # Helpers function add_names(ind::Index, add_ind::Index) - u = names(add_ind) + add_names(_names(ind), names(add_ind)) +end - seen = Set(_names(ind)) +function add_names(a::Vector{Symbol}, u::Vector{Symbol}) + seen = Set(a) dups = Int[] for i in 1:length(u) diff --git a/test/cat.jl b/test/cat.jl index 52c230a..da484d7 100644 --- a/test/cat.jl +++ b/test/cat.jl @@ -3,7 +3,7 @@ module TestCat using DataTables # - # hcat + # merge # nvint = NullableArray(Nullable{Int}[1, 2, Nullable(), 4]) @@ -14,35 +14,39 @@ module TestCat dt4 = convert(DataTable, [1:4 1:4]) dt5 = DataTable(Any[NullableArray([1,2,3,4]), nvstr]) - dth = hcat(dt3, dt4) + dth = merge(dt3, dt4) @test size(dth, 2) == 3 @test names(dth) == [:x1, :x1_1, :x2] @test isequal(dth[:x1], dt3[:x1]) - @test isequal(dth, [dt3 dt4]) - @test isequal(dth, DataTables.hcat!(DataTable(), dt3, dt4)) + @test isequal(dth, merge(dt3, dt4)) + @test isequal(dth, merge!(DataTable(), dt3, dt4)) - dth3 = hcat(dt3, dt4, dt5) + dth3 = merge(dt3, dt4, dt5) @test names(dth3) == [:x1, :x1_1, :x2, :x1_2, :x2_1] - @test isequal(dth3, hcat(dth, dt5)) - @test isequal(dth3, DataTables.hcat!(DataTable(), dt3, dt4, dt5)) + @test isequal(dth3, merge(dth, dt5)) + @test isequal(dth3, merge!(DataTable(), dt3, dt4, dt5)) - @test isequal(dt2, DataTables.hcat!(dt2)) + @test isequal(dt2, merge!(dt2)) - @testset "hcat ::AbstractDataTable" begin + @testset "merge ::AbstractDataTable" begin dt = DataTable(A = repeat('A':'C', inner=4), B = 1:12) gd = groupby(dt, :A) answer = DataTable(A = fill('A', 4), B = 1:4, A_1 = 'B', B_1 = 5:8, A_2 = 'C', B_2 = 9:12) - @test hcat(gd...) == answer + @test merge(gd...) == answer answer = answer[1:4] - @test hcat(gd[1], gd[2]) == answer + @test merge(gd[1], gd[2]) == answer end - @testset "hcat ::Vectors" begin + @testset "append ::Vectors" begin dt = DataTable() - DataTables.hcat!(dt, NullableCategoricalVector(1:10)) + append!(dt, NullableCategoricalVector(1:10)) @test isequal(dt[1], NullableCategoricalVector(1:10)) - DataTables.hcat!(dt, NullableArray(1:10)) + append!(dt, NullableArray(1:10)) @test isequal(dt[2], NullableArray(1:10)) + dt2 = append(dt, collect(1:10)) + @test isequal(dt2[3], collect(1:10)) + @test ncol(dt) == 2 + @test ncol(dt2) == 3 end # diff --git a/test/datatable.jl b/test/datatable.jl index 7c7175c..a7b7c2b 100644 --- a/test/datatable.jl +++ b/test/datatable.jl @@ -109,7 +109,7 @@ module TestDataTable dt = DataTable(a=[1, 2], b=[3., 4.]) dt2 = DataTable(b=["a", "b"], c=[:c, :d]) @test isequal(merge!(dt, dt2), dt) - @test isequal(dt, DataTable(a=[1, 2], b=["a", "b"], c=[:c, :d])) + @test isequal(dt, DataTable(a=[1, 2], b=[3., 4.], b_1=["a", "b"], c=[:c, :d])) #test_group("Empty DataTable constructors") dt = DataTable(Nullable{Int}, 10, 3) @@ -322,7 +322,7 @@ module TestDataTable dt = DataTable(A = 1:10, B = 'A':'J') @test !(dt[:,:] === dt) - @test append!(DataTable(A = 1:2, B = 1:2), DataTable(A = 3:4, B = 3:4)) == DataTable(A=1:4, B = 1:4) + @test vcat(DataTable(A = 1:2, B = 1:2), DataTable(A = 3:4, B = 3:4)) == DataTable(A=1:4, B = 1:4) dt = DataTable(A = NullableArray(1:3), B = NullableArray(4:6)) @test all(c -> isa(c, NullableArray), categorical!(deepcopy(dt)).columns) @test all(c -> isa(c, NullableCategoricalArray), categorical!(deepcopy(dt), [1,2]).columns)