Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix simdat_crossed for between-subject/between-item factors #66

Merged
merged 3 commits into from
Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 25 additions & 6 deletions src/simdat.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,19 +72,38 @@ function simdat_crossed(rng::AbstractRNG, subj_n=1, item_n=1;
item_names = vcat(["item"], ib_vars)
item = (; (Symbol(k) => v for (k,v) in zip(item_names, item_vals))...)

# set up within both table
if (isnothing(both_win))
# cross the subject and item tables
design = factorproduct(subj, item)
# Check whether there are experimental factors which are both between-subject and between-item
if isnothing(subj_btwn) || isnothing(item_btwn)
both_between = []
else
both_between = intersect(keys(subj_btwn), keys(item_btwn))
end

# Case where there are not factors that are both within subject and within item
if isnothing(both_win)
# and there are no factors that are both between subject and between item
if isempty(both_between)
# cross the subject and item tables
design = factorproduct(subj, item)
else
# make sure that each subject/item is only in one level of the between subject/between item factor
design = [merge(x, y) for x in rowtable(subj), y in rowtable(item) if all(x[var] == y[var] for var in both_between)]
end
else
# set up within both table
wc = values(both_win) |> collect
win_prod = Iterators.product(wc...)
win_vals = columntable(win_prod) |> collect
win_names = collect(keys(both_win))
win = (; (Symbol(k) => v for (k,v) in zip(win_names, win_vals))...)

# cross the subject and item tables with any within factors
design = factorproduct(subj, item, win)
if isempty(both_between)
# cross the subject and item tables with any within factors
design = factorproduct(subj, item, win)
else
# make sure that each subject/item is only in one level of the between subject/between item factor
design = [merge(x, y, z) for x in rowtable(subj), y in rowtable(item), z in rowtable(win) if all(x[var] == y[var] for var in both_between)]
end
end

dv = randn(rng, length(design))
Expand Down
148 changes: 148 additions & 0 deletions test/simdat.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,151 @@ using Test
@test Tables.isrowtable(dat)
@test Set(Tables.columnnames(first(dat))) == Set([:subj, :item, :dv])
end

@testset "simdat_crossed between-subjects between-items - simple case" begin
# stimulate data for a case in which a factor is both between-subject and between-item
conditions = Dict(:cond => ["A", "B"])
cond_n = length(conditions[:cond])
subj_n = 2
item_n = 4

data = DataFrame(simdat_crossed(subj_n, item_n,
subj_btwn = conditions,
item_btwn = conditions))

@test nrow(data) == subj_n * (item_n/cond_n)

# Test whether each subject is only in one of the conditions
for s in nlevels(subj_n,"S")
@test length(unique(data[isequal.(data.subj, s), :cond])) == 1
end

# Test whether each item is only in one of the conditions
for i in nlevels(item_n,"I")
@test length(unique(data[isequal.(data.item, i), :cond])) == 1
end

end;

@testset "simdat_crossed between-subjects between-items - complex case" begin
# stimulate data for a case in which a factor is both between-subject and between-item
both_btwn = Dict(:cond1 => ["A", "B"],
:cond2 => ["C", "D", "E"])
subj_btwn = merge(Dict(:age => ["O", "Y"]), both_btwn)
item_btwn = merge(Dict(:pet => ["cat", "dog"]), both_btwn)
both_win = Dict(:time => ["morning", "evening"])

subj_n = 12
item_n = 12

data = DataFrame(simdat_crossed(subj_n, item_n,
subj_btwn = subj_btwn,
item_btwn = item_btwn,
both_win = both_win))

# Test whether each subject is only in one of the levels of the between-subject/between-items conditions
for s in nlevels(subj_n, "S"), cond in [:cond1, :cond2]
@test length(unique(data[isequal.(data.subj, s), cond])) == 1
end

# Test whether each item is only in one of the conditions
for i in nlevels(item_n, "I"), cond in [:cond1, :cond2]
@test length(unique(data[isequal.(data.item, i), cond])) == 1
end

end;

@testset "simdat_crossed test all combinations" begin
both_btwn = Dict(:cond1 => ["A", "B"],
:cond2 => ["C", "D", "E"])
subj_btwn = merge(Dict(:age => ["O", "Y"]), both_btwn)
item_btwn = merge(Dict(:pet => ["cat", "dog"]), both_btwn)
both_win = Dict(:time => ["morning", "evening"])


subj_n = 12
item_n = 12
data = DataFrame(simdat_crossed(subj_n, item_n,
subj_btwn=subj_btwn,
item_btwn=item_btwn,
both_win=both_win))
s2 = subset(data, :subj => ByRow(==("S02")))
@test all(==("A"), s2.cond1)
@test all(==("Y"), s2.age)
@test length(unique(s2.pet)) == 2 # from item between

#-----
# no subject between
data = DataFrame(simdat_crossed(subj_n, item_n,
item_btwn=item_btwn,
both_win=both_win))

@test nrow(data) == 288 # many more rows because many more effects are within-subject
s2 = subset(data, :subj => ByRow(==("S02")))
@test length(unique(string.(s2.cond1) .* string.(s2.cond2))) == 6 # test all 6 combinations are there
@test length(unique(s2.cond1)) == 2 # now we have both conditions within subject

i2 = subset(data, :item => ByRow(==("I02")))
@test all(==("A"), i2.cond1) # but only one because cond1 is between-items here


#-----
# no item between
data = DataFrame(simdat_crossed(subj_n, item_n,
subj_btwn=subj_btwn,
both_win=both_win))
@test nrow(data) == 288 # many more rows because many more effects are within-subject
i2 = subset(data, :item => ByRow(==("I02")))
@test length(unique(string.(i2.cond1) .* string.(i2.cond2))) == 6 # test all 6 combinations are there
@test length(unique(i2.cond1)) == 2 # now we have both conditions within item

s2 = subset(data, :subj => ByRow(==("S02")))
@test all(s2.cond1 .== "A") # but only one because cond1 is between-subject here

#---------
data = DataFrame(simdat_crossed(subj_n, item_n; both_win))
@test nrow(data) == subj_n * item_n * 2
i2 = subset(data, :item => ByRow(==("I02")))
s2 = subset(data, :subj => ByRow(==("S02")))
@test length(unique(i2.subj)) == 12
@test length(unique(s2.item)) == 12


#------

data = DataFrame(simdat_crossed(subj_n, item_n))
@test nrow(data) == subj_n * item_n


#-----
data = DataFrame(simdat_crossed(subj_n, item_n; subj_btwn, item_btwn))
@test nrow(data) == 24
i2 = subset(data, :item => ByRow(==("I02")))
@test nrow(i2) == 2
@test length(unique(i2.subj)) == 2 # because age is within item, but between subjects, we have two subjects here
@test length(unique(i2.age)) == 2 # because age is within item

#----
data = DataFrame(simdat_crossed(subj_n, item_n,
subj_btwn=subj_btwn))
@test nrow(data) == 12*12
i2 = subset(data, :item => ByRow(==("I02")))
@test length(unique(i2.subj)) == 12
@test length(unique(i2.cond1 .* i2.cond2 .* i2.age)) == 12 # everything is within item
s2 = subset(data, :subj => ByRow(==("S02")))
@test length(unique(i2.subj)) == 12
@test length(unique(s2.cond1 .* s2.cond2 .* s2.age)) == 1 # everything is within item

#--- same as previous but for item
data = DataFrame(simdat_crossed(subj_n, item_n,
item_btwn=item_btwn))
@test nrow(data) == 12 * 12
i2 = subset(data, :item => ByRow(==("I02")))
@test length(unique(i2.subj)) == 12
@test length(unique(i2.cond1 .* i2.cond2 .* i2.pet)) == 1 # everything is within item
s2 = subset(data, :subj => ByRow(==("S02")))
@test length(unique(i2.subj)) == 12
@test length(unique(s2.cond1 .* s2.cond2 .* s2.pet)) == 12 # everything is within item


end
Loading