Skip to content

Commit

Permalink
Merge branch 'release/2.6.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
psychelzh committed Nov 28, 2023
2 parents ae6bf0c + bed6516 commit 761df88
Show file tree
Hide file tree
Showing 27 changed files with 748 additions and 258 deletions.
5 changes: 4 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: preproc.iquizoo
Title: Utility Functions for Data Processing of Iquizoo Games
Version: 2.5.2
Version: 2.6.0
Authors@R:
person("Liang", "Zhang", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0001-9041-1150"))
Expand All @@ -15,6 +15,7 @@ Depends:
R (>= 4.1.0)
Imports:
dplyr,
jsonlite,
pracma,
purrr,
rlang (>= 0.1.2),
Expand All @@ -29,8 +30,10 @@ Suggests:
readr,
roxygen2,
testthat (>= 3.0.0),
tidytable,
withr
Config/testthat/edition: 3
Config/testthat/parallel: true
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ export(multisense)
export(nback)
export(nle)
export(nsymncmp)
export(preproc_data)
export(racer)
export(rapm)
export(refframe)
Expand All @@ -39,6 +40,7 @@ export(sumweighted)
export(switchcost)
export(symncmp)
export(synwin)
export(wrangle_data)
import(dplyr)
import(rlang)
import(tidyr)
10 changes: 10 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
# preproc.iquizoo 2.6.0

## Breaking Changes

* Added `wrangle_data()` and `preproc_data()` functions, which were previously in tarflow.iquizoo package.

## Bug Fixes

* Fixed an edge case when `fit_numerosity()` will stuck in infinite loop.

# preproc.iquizoo 2.5.2

* Enhance code quality of internal functions.
Expand Down
67 changes: 66 additions & 1 deletion R/nsymncmp.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,72 @@ calc_numerosity <- function(data, name_bigset, name_smallset, name_acc) {
"Cannot find fit after the max number of fitting.",
"fit_not_converge"
)
pars <- replace(pars, TRUE, NA_real_)
}
tibble::as_tibble_row(pars)
}

#' Fit a Simple Numerosity Model
#'
#' This model assumes the distribution of mental representation for a given
#' number/count k is N(k, (w * k) ^ 2).
#'
#' @template common
#' @param name_bigset,name_smallset Variable name in `data` indicates bigger and
#' smaller set.
#' @param name_acc Variable name in `data` indicates user's response is correct
#' or not.
#' @param n_fit Number of fits to try to find the best estimate.
#' @param seed Random seed. Default is 1 so that results can be reproduced.
#' @return A [list()] with structure the same as [optim()].
#' @export
fit_numerosity <- function(data, name_bigset, name_smallset, name_acc,
n_fit = 5, seed = 1) {
set.seed(seed)
b <- data[[name_bigset]]
s <- data[[name_smallset]]
acc <- data[[name_acc]]

min_objective <- Inf
best_fit <- NULL
for (j in seq_len(n_fit)) {
# try 10 times to find a good initial value
for (i in seq_len(10)) {
init <- c(w = stats::runif(1))
init_objective <- ll_numerosity(init, b, s, acc)
if (init_objective < 1e6) {
break
}
}
if (init_objective >= 1e6) {
warn(
"Cannot find a good initial value after 10 tries.",
"no_good_init"
)
return(list(par = c(w = NA_real_), convergence = 1))
}
fit <- stats::optim(
init, ll_numerosity,
method = "L-BFGS-B",
b = b, s = s, acc = acc,
lower = 0
)
if (fit[["value"]] < min_objective) {
best_fit <- fit
}
}
best_fit
}

ll_numerosity <- function(pars, b, s, acc) {
means <- b - s
sds <- pars["w"]^2 * (b^2 + s^2)

# incorrect means the mental representation is less than 0, so lower tail
dens <- ifelse(
!acc,
stats::pnorm(0, means, sds, lower.tail = TRUE, log.p = TRUE),
stats::pnorm(0, means, sds, lower.tail = FALSE, log.p = TRUE)
)

return(ifelse(any(!is.finite(dens)), 1e6, -sum(dens)))
}
122 changes: 122 additions & 0 deletions R/preproc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#' Wrangle Raw Data
#'
#' Parse raw json string data as [data.frame()] and store them in a list column.
#'
#' @param data The raw data.
#' @param name_raw_json The column name in which stores user's raw data in
#' format of json string.
#' @param name_raw_parsed The name used to store parsed data.
#' @return A [data.frame] contains the parsed data.
#' @export
wrangle_data <- function(data,
name_raw_json = "game_data",
name_raw_parsed = "raw_parsed") {
data[[name_raw_parsed]] <- purrr::map(
data[[name_raw_json]],
parse_raw_json
)
select(data, !all_of(name_raw_json))
}

#' Feed Raw Data to Pre-processing
#'
#' Calculate indices using data typically returned by [wrangle_data()].
#'
#' @details
#'
#' Observations with empty raw data (empty vector, e.g. `NULL`, in
#' `name_raw_parsed` column) are removed before calculating indices. If no
#' observations left after removing, a warning is signaled and `NULL` is
#' returned.
#'
#' @param data A [data.frame] contains raw data.
#' @param fn This can be a function or formula. See [rlang::as_function()] for
#' more details.
#' @param ... Additional arguments passed to `fn`.
#' @param name_raw_parsed The column name in which stores user's raw data in
#' format of a list of [data.frame]s.
#' @param pivot_results Whether to pivot the calculated indices. If `TRUE`, the
#' calculated indices are pivoted into long format, with each index name
#' stored in the column of `pivot_names_to`, and each index value stored in
#' the column of `pivot_values_to`. If `FALSE`, the calculated indices are
#' stored in the same format as returned by `fn`.
#' @param pivot_names_to,pivot_values_to The column names used to store index
#' names and values if `pivot_results` is `TRUE`. See [tidyr::pivot_longer()]
#' for more details.
#' @return A [data.frame] contains the calculated indices.
#' @export
preproc_data <- function(data, fn, ...,
name_raw_parsed = "raw_parsed",
pivot_results = TRUE,
pivot_names_to = "index_name",
pivot_values_to = "score") {
data <- filter(data, !purrr::map_lgl(.data[[name_raw_parsed]], is_empty))
if (nrow(data) == 0) {
warn("No non-empty data found.")
return()
}
fn <- as_function(fn)
results <- data |>
mutate(
calc_indices(.data[[name_raw_parsed]], fn, ...),
.keep = "unused"
)
if (pivot_results) {
results <- results |>
pivot_longer(
cols = !any_of(names(data)),
names_to = pivot_names_to,
values_to = pivot_values_to
) |>
vctrs::vec_restore(data)
}
results
}

# helper functions
parse_raw_json <- function(jstr) {
parsed <- tryCatch(
jsonlite::fromJSON(jstr),
error = function(cnd) {
warn(
c(
"Failed to parse json string with the following error:",
conditionMessage(cnd),
i = "Will parse it as `NULL` instead."
)
)
return()
}
)
if (is_empty(parsed)) {
return()
}
parsed |>
rename_with(tolower) |>
mutate(across(where(is.character), tolower))
}

calc_indices <- function(l, fn, ...) {
# used as a temporary id for each element
name_id <- ".id"
tryCatch(
bind_rows(l, .id = name_id),
error = function(cnd) {
warn(
c(
"Failed to bind raw data with the following error:",
conditionMessage(cnd),
i = "Will try using tidytable package."
)
)
check_installed(
"tidytable",
"because tidyr package fails to bind raw data."
)
tidytable::bind_rows(l, .id = name_id) |>
utils::type.convert(as.is = TRUE)
}
) |>
fn(.by = name_id, ...) |>
select(!all_of(name_id))
}
102 changes: 102 additions & 0 deletions R/switch-congruence.R
Original file line number Diff line number Diff line change
Expand Up @@ -186,3 +186,105 @@ switchcost <- function(data, .by = NULL, .input = NULL, .extra = NULL) {
merge(spd_acc, switch_cost, by = .by) |>
vctrs::vec_restore(data)
}

#' Switch cost
#'
#' Utility function to calculate general and specific switch cost.
#'
#' @template common
#' @param by The column name(s) in `data` used to be grouped by. If set to
#' `NULL`, all data will be treated as from one subject.
#' @templateVar name_acc TRUE
#' @templateVar name_rt TRUE
#' @template names
#' @param name_switch The column name of the `data` input whose values are
#' the switch type, in which is a `character` vector with at least `"switch"`
#' and `"repeat"` values.
#' @keywords internal
calc_switch_cost <- function(data, by, name_switch, name_rt, name_acc) {
data[[name_switch]] <- factor(data[[name_switch]], c("switch", "repeat"))
calc_cond_diff(
data,
by,
name_cond = name_switch,
name_diff_prefix = "switch_cost_",
name_acc = name_acc,
name_rt = name_rt
)
}

#' Congruence effect
#'
#' Utility function to calculate congruence effect sizes.
#'
#' @template common
#' @param by The column name(s) in `data` used to be grouped by. If set to
#' `NULL`, all data will be treated as from one subject.
#' @templateVar name_acc TRUE
#' @templateVar name_rt TRUE
#' @template names
#' @param name_cong The column name of the `data` input whose values are the
#' congruence information, in which is a `character` vector with "incongruent
#' condition" (label: `"inc"`) and "congruent condition" (label: `"con"`). It
#' will be coerced as a `factor` vector with these two levels.
#' @return A [tibble][tibble::tibble-package] contains congruence effect results
#' on accuracy and response time.
#' @keywords internal
calc_cong_eff <- function(data, by, name_cong, name_acc, name_rt) {
data[[name_cong]] <- factor(data[[name_cong]], c("inc", "con"))
calc_cond_diff(
data,
by,
name_cond = name_cong,
name_diff_prefix = "cong_eff_",
name_acc = name_acc,
name_rt = name_rt
)
}

calc_cond_diff <- function(data, by, name_acc, name_rt,
name_cond, name_diff_prefix) {
conds <- levels(data[[name_cond]])
index_each_cond <- data |>
calc_spd_acc(
by = c(by, name_cond),
name_acc = name_acc,
name_rt = name_rt
) |>
complete(.data[[name_cond]]) |>
select(all_of(c(by, name_cond, "pc", "mrt", "ies", "rcs", "lisas")))
index_each_cond |>
pivot_longer(
cols = -any_of(c(by, name_cond)),
names_to = "index_name",
values_to = "score"
) |>
pivot_wider(
names_from = all_of(name_cond),
values_from = "score"
) |>
mutate(
diff = .data[[conds[[1]]]] - .data[[conds[[2]]]],
.keep = "unused"
) |>
# make sure larger values correspond to larger switch cost
mutate(
diff = if_else(
.data$index_name %in% c("pc", "rcs"),
-diff, diff
)
) |>
pivot_wider(
names_from = "index_name",
values_from = "diff",
names_prefix = name_diff_prefix
) |>
merge(
index_each_cond |>
pivot_wider(
names_from = all_of(name_cond),
values_from = -any_of(c(by, name_cond))
),
by = by
)
}
Loading

0 comments on commit 761df88

Please sign in to comment.