From 0ac8e5c774a6076a70c23e50b800edaafc6f0154 Mon Sep 17 00:00:00 2001 From: Cameron Prybol Date: Sun, 16 Apr 2017 10:38:20 -0700 Subject: [PATCH 1/3] Add tests to trigger duplicate entries in unstack warning In v0.6, where the macros test_warn and test_nowarn are defined, tests will actually check the warning messages. In v0.5 warnings are not checked but can be manually confirmed by reviewing the output of running Pkg.test("DataTables"). --- test/datatable.jl | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/test/datatable.jl b/test/datatable.jl index c75f5fe..8973da2 100644 --- a/test/datatable.jl +++ b/test/datatable.jl @@ -341,4 +341,24 @@ module TestDataTable @test find(c -> isa(c, NullableCategoricalArray), categorical!(DataTable(A=1:3, B=4:6), :A).columns) == [1] @test find(c -> isa(c, NullableCategoricalArray), categorical!(DataTable(A=1:3, B=4:6), [1]).columns) == [1] @test find(c -> isa(c, NullableCategoricalArray), categorical!(DataTable(A=1:3, B=4:6), 1).columns) == [1] + + @testset "duplicate entries in unstack warnings" begin + dt = DataTable(id=NullableArray([1, 2, 1, 2]), variable=["a", "b", "a", "b"], value=[3, 4, 5, 6]) + @static if VERSION >= v"0.6.0-dev.1980" + @test_warn "Duplicate entries in unstack." unstack(dt, :id, :variable, :value) + @test_warn "Duplicate entries in unstack at row 3." unstack(dt, :variable, :value) + end + a = unstack(dt, :id, :variable, :value) + b = unstack(dt, :variable, :value) + @test a == b == DataTable(id = Nullable[1, 2], a = [5, Nullable()], b = [Nullable(), 6]) + + dt = DataTable(id=NullableArray(1:2), variable=["a", "b"], value=3:4) + @static if VERSION >= v"0.6.0-dev.1980" + @test_nowarn unstack(dt, :id, :variable, :value) + @test_nowarn unstack(dt, :variable, :value) + end + a = unstack(dt, :id, :variable, :value) + b = unstack(dt, :variable, :value) + @test a == b == DataTable(id = Nullable[1, 2], a = [3, Nullable()], b = [Nullable(), 4]) + end end From 71edd51a86555ba471dff4268447f97ae1c2ffa3 Mon Sep 17 00:00:00 2001 From: Cameron Prybol Date: Sun, 16 Apr 2017 13:03:01 -0700 Subject: [PATCH 2/3] Update stack functions to use similar_nullable Stack was using NullableArray, which would incorrectly cast CategoricalArrays to NullableArrays{CategoricalValue}. Adds a new similar_nullable function for NullableCategoricalArrays. --- src/abstractdatatable/join.jl | 3 +++ src/abstractdatatable/reshape.jl | 20 ++++++-------------- test/datatable.jl | 18 ++++++++++++++++++ 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/src/abstractdatatable/join.jl b/src/abstractdatatable/join.jl index 1ad170b..aa1302b 100644 --- a/src/abstractdatatable/join.jl +++ b/src/abstractdatatable/join.jl @@ -15,6 +15,9 @@ similar_nullable{T,R}(dv::CategoricalArray{T,R}, dims::@compat(Union{Int, Tuple{ similar_nullable(dt::AbstractDataTable, dims::Int) = DataTable(Any[similar_nullable(x, dims) for x in columns(dt)], copy(index(dt))) +similar_nullable{T,R}(dv::NullableCategoricalArray{T,R}, dims::Union{Int, Tuple{Vararg{Int}}}) = + NullableCategoricalArray{T}(dims) + # helper structure for DataTables joining immutable DataTableJoiner{DT1<:AbstractDataTable, DT2<:AbstractDataTable} dtl::DT1 diff --git a/src/abstractdatatable/reshape.jl b/src/abstractdatatable/reshape.jl index ed4d519..cc6b97f 100644 --- a/src/abstractdatatable/reshape.jl +++ b/src/abstractdatatable/reshape.jl @@ -198,12 +198,7 @@ function unstack(dt::AbstractDataTable, rowkey::Int, colkey::Int, value::Int) keycol = NullableCategoricalArray(dt[colkey]) Nrow = length(refkeycol.pool) Ncol = length(keycol.pool) - T = eltype(valuecol) - if T <: Nullable - T = eltype(T) - end - payload = DataTable(Any[NullableArray(T, Nrow) for i in 1:Ncol], - map(Symbol, levels(keycol))) + payload = DataTable(Any[similar_nullable(valuecol, Nrow) for i in 1:Ncol], map(Symbol, levels(keycol))) nowarning = true for k in 1:nrow(dt) j = Int(CategoricalArrays.order(keycol.pool)[keycol.refs[k]]) @@ -216,7 +211,9 @@ function unstack(dt::AbstractDataTable, rowkey::Int, colkey::Int, value::Int) payload[j][i] = valuecol[k] end end - insert!(payload, 1, NullableArray(levels(refkeycol)), _names(dt)[rowkey]) + levs = levels(refkeycol) + col = similar_nullable(dt[rowkey], length(levs)) + insert!(payload, 1, copy!(col, levs), _names(dt)[rowkey]) end unstack(dt::AbstractDataTable, rowkey, colkey, value) = unstack(dt, index(dt)[rowkey], index(dt)[colkey], index(dt)[value]) @@ -235,15 +232,10 @@ function unstack(dt::AbstractDataTable, colkey::Int, value::Int) end keycol = NullableCategoricalArray(dt[colkey]) valuecol = dt[value] - dt1 = dt[g.idx[g.starts], g.cols] + dt1 = nullable!(dt[g.idx[g.starts], g.cols], g.cols) Nrow = length(g) Ncol = length(levels(keycol)) - T = eltype(valuecol) - if T <: Nullable - T = eltype(T) - end - dt2 = DataTable(Any[NullableArray(T, Nrow) for i in 1:Ncol], - map(@compat(Symbol), levels(keycol))) + dt2 = DataTable(Any[similar_nullable(valuecol, Nrow) for i in 1:Ncol], map(Symbol, levels(keycol))) nowarning = true for k in 1:nrow(dt) j = Int(CategoricalArrays.order(keycol.pool)[keycol.refs[k]]) diff --git a/test/datatable.jl b/test/datatable.jl index 8973da2..6aca8d1 100644 --- a/test/datatable.jl +++ b/test/datatable.jl @@ -342,6 +342,24 @@ module TestDataTable @test find(c -> isa(c, NullableCategoricalArray), categorical!(DataTable(A=1:3, B=4:6), [1]).columns) == [1] @test find(c -> isa(c, NullableCategoricalArray), categorical!(DataTable(A=1:3, B=4:6), 1).columns) == [1] + @testset "unstack nullable promotion" begin + dt = DataTable(Any[repeat(1:2, inner=4), repeat('a':'d', outer=2), collect(1:8)], + [:id, :variable, :value]) + udt = unstack(dt) + @test udt == unstack(dt, :variable, :value) == unstack(dt, :id, :variable, :value) + @test udt == DataTable(Any[Nullable[1, 2], Nullable[1, 5], Nullable[2, 6], + Nullable[3, 7], Nullable[4, 8]], [:id, :a, :b, :c, :d]) + @test all(typeof.(udt.columns) .== NullableVector{Int}) + dt = DataTable(Any[categorical(repeat(1:2, inner=4)), + categorical(repeat('a':'d', outer=2)), categorical(1:8)], + [:id, :variable, :value]) + udt = unstack(dt) + @test udt == unstack(dt, :variable, :value) == unstack(dt, :id, :variable, :value) + @test udt == DataTable(Any[Nullable[1, 2], Nullable[1, 5], Nullable[2, 6], + Nullable[3, 7], Nullable[4, 8]], [:id, :a, :b, :c, :d]) + @test all(typeof.(udt.columns) .== NullableCategoricalVector{Int, UInt32}) + end + @testset "duplicate entries in unstack warnings" begin dt = DataTable(id=NullableArray([1, 2, 1, 2]), variable=["a", "b", "a", "b"], value=[3, 4, 5, 6]) @static if VERSION >= v"0.6.0-dev.1980" From 7b59074bdbd6d79f3939294710c5dfea73552a7d Mon Sep 17 00:00:00 2001 From: Cameron Prybol Date: Sun, 16 Apr 2017 13:09:38 -0700 Subject: [PATCH 3/3] Update similar_nullable conventions (depwarn and compat) --- src/abstractdatatable/join.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/abstractdatatable/join.jl b/src/abstractdatatable/join.jl index aa1302b..0bc4656 100644 --- a/src/abstractdatatable/join.jl +++ b/src/abstractdatatable/join.jl @@ -3,14 +3,14 @@ ## # Like similar, but returns a nullable array -similar_nullable{T}(dv::AbstractArray{T}, dims::@compat(Union{Int, Tuple{Vararg{Int}}})) = - NullableArray(T, dims) +similar_nullable{T}(dv::AbstractArray{T}, dims::Union{Int, Tuple{Vararg{Int}}}) = + NullableArray{T}(dims) -similar_nullable{T<:Nullable}(dv::AbstractArray{T}, dims::@compat(Union{Int, Tuple{Vararg{Int}}})) = - NullableArray(eltype(T), dims) +similar_nullable{T<:Nullable}(dv::AbstractArray{T}, dims::Union{Int, Tuple{Vararg{Int}}}) = + NullableArray{eltype(T)}(dims) -similar_nullable{T,R}(dv::CategoricalArray{T,R}, dims::@compat(Union{Int, Tuple{Vararg{Int}}})) = - NullableCategoricalArray(T, dims) +similar_nullable{T,R}(dv::CategoricalArray{T,R}, dims::Union{Int, Tuple{Vararg{Int}}}) = + NullableCategoricalArray{T}(dims) similar_nullable(dt::AbstractDataTable, dims::Int) = DataTable(Any[similar_nullable(x, dims) for x in columns(dt)], copy(index(dt)))