From 6264c6b7c215438a700ab564afbf5d3916d9d923 Mon Sep 17 00:00:00 2001
From: Liang Zhang <psychelzh@outlook.com>
Date: Thu, 5 Oct 2023 10:25:50 +0800
Subject: [PATCH 01/12] Increment version number to 2.5.2.9000

---
 DESCRIPTION | 2 +-
 NEWS.md     | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index e1c9629..204f7f9 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: preproc.iquizoo
 Title: Utility Functions for Data Processing of Iquizoo Games
-Version: 2.5.2
+Version: 2.5.2.9000
 Authors@R:
     person("Liang", "Zhang", , "psychelzh@outlook.com", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0001-9041-1150"))
diff --git a/NEWS.md b/NEWS.md
index 6c56703..584b7da 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,5 @@
+# preproc.iquizoo (development version)
+
 # preproc.iquizoo 2.5.2
 
 * Enhance code quality of internal functions.

From d7a67d0c5585c8df90cb295ae3d5438dc0e29905 Mon Sep 17 00:00:00 2001
From: Liang Zhang <psychelzh@outlook.com>
Date: Thu, 5 Oct 2023 20:45:52 +0800
Subject: [PATCH 02/12] Enhance code quality

Signed-off-by: Liang Zhang <psychelzh@outlook.com>
---
 R/utils-common.R                              | 28 +++++++++----------
 .../testthat/test-calc_staircase_wetherill.R  |  2 +-
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/R/utils-common.R b/R/utils-common.R
index 4ab0ad5..4bc567a 100644
--- a/R/utils-common.R
+++ b/R/utils-common.R
@@ -62,26 +62,26 @@ check_outliers_rt <- function(x, threshold = 2.5) {
 #' @return The mean threshold.
 #' @keywords internal
 calc_staircase_wetherill <- function(x) {
-  find_reversals <- function(x, type = c("peaks", "valleys")) {
-    type <- match.arg(type)
-    if (type == "valleys") x <- -x
-    mat <- pracma::findpeaks(x)
-    if (is.null(mat)) {
-      warn(paste("No", type, "found from input"), "input_not_suitable")
-      return()
-    }
-    if (type == "valleys") {
-      -mat[, 1]
-    } else {
+  find_reversals <- function(x) {
+    find_peaks_val <- function(x) {
+      mat <- pracma::findpeaks(x)
+      if (is.null(mat)) {
+        warn("Reversals not found from input", "no_reversals_found")
+        return(NA_real_)
+      }
       mat[, 1]
     }
+    list(
+      peaks = find_peaks_val(x),
+      valleys = -find_peaks_val(-x)
+    )
   }
-  # use run length encoding to remove repetitions in transformed method
+  # remove repetitions in transformed method
   x <- rle(x)$values
-  reversals <- c("peaks", "valleys") |>
-    purrr::map(\(type) find_reversals(x, type))
+  reversals <- find_reversals(x)
   reversals |>
     purrr::map(
+      # keep equal number of peaks and valleys
       \(x) utils::tail(x, min(lengths(reversals)))
     ) |>
     purrr::list_c() |>
diff --git a/tests/testthat/test-calc_staircase_wetherill.R b/tests/testthat/test-calc_staircase_wetherill.R
index 71fd870..c8f1c38 100644
--- a/tests/testthat/test-calc_staircase_wetherill.R
+++ b/tests/testthat/test-calc_staircase_wetherill.R
@@ -8,5 +8,5 @@ test_that("Correctly find all peaks and valleys", {
 test_that("Return NA if no peaks or valleys", {
   is.na(calc_staircase_wetherill(c(2:5))) |>
     expect_true() |>
-    expect_warning(class = "input_not_suitable")
+    expect_warning(class = "no_reversals_found")
 })

From 9c92a6e564bfe5c85a6927732b8e5e48b4bc495e Mon Sep 17 00:00:00 2001
From: Liang Zhang <psychelzh@outlook.com>
Date: Thu, 5 Oct 2023 20:52:41 +0800
Subject: [PATCH 03/12] Update utils structures

Signed-off-by: Liang Zhang <psychelzh@outlook.com>
---
 R/nsymncmp.R                          |  57 ++++++++++++++
 R/switch-congruence.R                 | 102 ++++++++++++++++++++++++++
 R/utils-common.R                      |  89 ----------------------
 R/utils-switch-congruence.R           | 101 -------------------------
 R/{utils-speed-accuracy.R => utils.R} |  90 +++++++++++++++++++++++
 R/utils_numerosity.R                  |  56 --------------
 man/calc_cong_eff.Rd                  |   2 +-
 man/calc_sdt.Rd                       |   2 +-
 man/calc_spd_acc.Rd                   |   2 +-
 man/calc_staircase_wetherill.Rd       |   2 +-
 man/calc_switch_cost.Rd               |   2 +-
 man/check_outliers_rt.Rd              |   2 +-
 man/fit_numerosity.Rd                 |   2 +-
 man/parse_char_resp.Rd                |   2 +-
 man/update_settings.Rd                |   2 +-
 15 files changed, 258 insertions(+), 255 deletions(-)
 delete mode 100644 R/utils-common.R
 delete mode 100644 R/utils-switch-congruence.R
 rename R/{utils-speed-accuracy.R => utils.R} (63%)
 delete mode 100644 R/utils_numerosity.R

diff --git a/R/nsymncmp.R b/R/nsymncmp.R
index af49fb2..59878cf 100644
--- a/R/nsymncmp.R
+++ b/R/nsymncmp.R
@@ -57,3 +57,60 @@ calc_numerosity <- function(data, name_bigset, name_smallset, name_acc) {
   }
   tibble::as_tibble_row(pars)
 }
+
+#' Fit a Simple Numerosity Model
+#'
+#' This model assumes the distribution of mental representation for a given
+#' number/count k is N(k, (w * k) ^ 2).
+#'
+#' @template common
+#' @param name_bigset,name_smallset Variable name in `data` indicates bigger and
+#'   smaller set.
+#' @param name_acc Variable name in `data` indicates user's response is correct
+#'   or not.
+#' @param n_fit Number of fits to try to find the best estimate.
+#' @param seed Random seed. Default is 1 so that results can be reproduced.
+#' @return A [list()] with structure the same as [optim()].
+#' @export
+fit_numerosity <- function(data, name_bigset, name_smallset, name_acc,
+                           n_fit = 5, seed = 1) {
+  set.seed(seed)
+  b <- data[[name_bigset]]
+  s <- data[[name_smallset]]
+  acc <- data[[name_acc]]
+
+  min_objective <- Inf
+  best_fit <- NULL
+  for (j in seq_len(n_fit)) {
+    repeat {
+      init <- c(w = stats::runif(1))
+      if (ll_numerosity(init, b, s, acc) < 1e6) {
+        break
+      }
+    }
+    fit <- stats::optim(
+      init, ll_numerosity,
+      method = "L-BFGS-B",
+      b = b, s = s, acc = acc,
+      lower = 0
+    )
+    if (fit[["value"]] < min_objective) {
+      best_fit <- fit
+    }
+  }
+  best_fit
+}
+
+ll_numerosity <- function(pars, b, s, acc) {
+  means <- b - s
+  sds <- pars["w"]^2 * (b^2 + s^2)
+
+  # incorrect means the mental representation is less than 0, so lower tail
+  dens <- ifelse(
+    !acc,
+    stats::pnorm(0, means, sds, lower.tail = TRUE, log.p = TRUE),
+    stats::pnorm(0, means, sds, lower.tail = FALSE, log.p = TRUE)
+  )
+
+  return(ifelse(any(!is.finite(dens)), 1e6, -sum(dens)))
+}
diff --git a/R/switch-congruence.R b/R/switch-congruence.R
index cbd4418..35fc13a 100644
--- a/R/switch-congruence.R
+++ b/R/switch-congruence.R
@@ -186,3 +186,105 @@ switchcost <- function(data, .by = NULL, .input = NULL, .extra = NULL) {
   merge(spd_acc, switch_cost, by = .by) |>
     vctrs::vec_restore(data)
 }
+
+#' Switch cost
+#'
+#' Utility function to calculate general and specific switch cost.
+#'
+#' @template common
+#' @param by The column name(s) in `data` used to be grouped by. If set to
+#'   `NULL`, all data will be treated as from one subject.
+#' @templateVar name_acc TRUE
+#' @templateVar name_rt TRUE
+#' @template names
+#' @param name_switch The column name of the `data` input whose values are
+#'   the switch type, in which is a `character` vector with at least `"switch"`
+#'   and `"repeat"` values.
+#' @keywords internal
+calc_switch_cost <- function(data, by, name_switch, name_rt, name_acc) {
+  data[[name_switch]] <- factor(data[[name_switch]], c("switch", "repeat"))
+  calc_cond_diff(
+    data,
+    by,
+    name_cond = name_switch,
+    name_diff_prefix = "switch_cost_",
+    name_acc = name_acc,
+    name_rt = name_rt
+  )
+}
+
+#' Congruence effect
+#'
+#' Utility function to calculate congruence effect sizes.
+#'
+#' @template common
+#' @param by The column name(s) in `data` used to be grouped by. If set to
+#'   `NULL`, all data will be treated as from one subject.
+#' @templateVar name_acc TRUE
+#' @templateVar name_rt TRUE
+#' @template names
+#' @param name_cong The column name of the `data` input whose values are the
+#'   congruence information, in which is a `character` vector with "incongruent
+#'   condition" (label: `"inc"`) and "congruent condition" (label: `"con"`). It
+#'   will be coerced as a `factor` vector with these two levels.
+#' @return A [tibble][tibble::tibble-package] contains congruence effect results
+#'   on accuracy and response time.
+#' @keywords internal
+calc_cong_eff <- function(data, by, name_cong, name_acc, name_rt) {
+  data[[name_cong]] <- factor(data[[name_cong]], c("inc", "con"))
+  calc_cond_diff(
+    data,
+    by,
+    name_cond = name_cong,
+    name_diff_prefix = "cong_eff_",
+    name_acc = name_acc,
+    name_rt = name_rt
+  )
+}
+
+calc_cond_diff <- function(data, by, name_acc, name_rt,
+                           name_cond, name_diff_prefix) {
+  conds <- levels(data[[name_cond]])
+  index_each_cond <- data |>
+    calc_spd_acc(
+      by = c(by, name_cond),
+      name_acc = name_acc,
+      name_rt = name_rt
+    ) |>
+    complete(.data[[name_cond]]) |>
+    select(all_of(c(by, name_cond, "pc", "mrt", "ies", "rcs", "lisas")))
+  index_each_cond |>
+    pivot_longer(
+      cols = -any_of(c(by, name_cond)),
+      names_to = "index_name",
+      values_to = "score"
+    ) |>
+    pivot_wider(
+      names_from = all_of(name_cond),
+      values_from = "score"
+    ) |>
+    mutate(
+      diff = .data[[conds[[1]]]] - .data[[conds[[2]]]],
+      .keep = "unused"
+    ) |>
+    # make sure larger values correspond to larger switch cost
+    mutate(
+      diff = if_else(
+        .data$index_name %in% c("pc", "rcs"),
+        -diff, diff
+      )
+    ) |>
+    pivot_wider(
+      names_from = "index_name",
+      values_from = "diff",
+      names_prefix = name_diff_prefix
+    ) |>
+    merge(
+      index_each_cond |>
+        pivot_wider(
+          names_from = all_of(name_cond),
+          values_from = -any_of(c(by, name_cond))
+        ),
+      by = by
+    )
+}
diff --git a/R/utils-common.R b/R/utils-common.R
deleted file mode 100644
index 4bc567a..0000000
--- a/R/utils-common.R
+++ /dev/null
@@ -1,89 +0,0 @@
-#' Convert character responses
-#'
-#' Simple function converts character correctness to numeric one.
-#'
-#' @param x The character vector to be parsed.
-#' @param delim Delimiter used to join correctness when forming the character.
-#'   Usually is hyphen (i.e., `"-"`), which is the default.
-#' @param convert_numeric A logical value indicating if the values should be
-#'   converted to `numeric` ones.
-#' @return A list of the parsed result, the same length as the input vector.
-#' @keywords internal
-parse_char_resp <- function(x, delim = "-", convert_numeric = TRUE) {
-  parsed <- stringr::str_split(x, delim)
-  if (convert_numeric) {
-    parsed <- purrr::map(parsed, as.numeric)
-  }
-  parsed
-}
-
-
-#' Update settings with option settings
-#'
-#' Options are set in list can be tricky to update. This function makes partly
-#' adding custom options work.
-#'
-#' @param origin The original settings.
-#' @param updates The updates to settings
-#' @return An update list of settings.
-#' @keywords internal
-update_settings <- function(origin, updates) {
-  if (is.null(updates)) {
-    return(origin)
-  }
-  purrr::imap(origin, ~ updates[[.y]] %||% .x)
-}
-
-#' Outliers Detection for response time data
-#'
-#' This method is also called "transform" method, because it does a
-#' transformation before applying z-score method.
-#'
-#' This is based on Cousineau, D., & Chartier, S. (2010), which is said to be
-#' suitable for reaction time data.
-#'
-#' @param x A vector of input reaction time data.
-#' @param threshold The threshold for determining whether a value is outlier or
-#'   not. Default is set at 2.5, which is best sample size dependent.
-#' @return A logical vector of the detected outliers.
-#' @keywords internal
-check_outliers_rt <- function(x, threshold = 2.5) {
-  z_scores <- scale(
-    scale(x, min(x, na.rm = TRUE), diff(range(x, na.rm = TRUE)))
-  )[, 1]
-  abs(z_scores) > threshold
-}
-
-#' Calculate threshold by staircase method
-#'
-#' Here we used the method suggested by Wetherill et al (1966).
-#'
-#' @param x The levels in data.
-#' @return The mean threshold.
-#' @keywords internal
-calc_staircase_wetherill <- function(x) {
-  find_reversals <- function(x) {
-    find_peaks_val <- function(x) {
-      mat <- pracma::findpeaks(x)
-      if (is.null(mat)) {
-        warn("Reversals not found from input", "no_reversals_found")
-        return(NA_real_)
-      }
-      mat[, 1]
-    }
-    list(
-      peaks = find_peaks_val(x),
-      valleys = -find_peaks_val(-x)
-    )
-  }
-  # remove repetitions in transformed method
-  x <- rle(x)$values
-  reversals <- find_reversals(x)
-  reversals |>
-    purrr::map(
-      # keep equal number of peaks and valleys
-      \(x) utils::tail(x, min(lengths(reversals)))
-    ) |>
-    purrr::list_c() |>
-    mean()
-}
diff --git a/R/utils-switch-congruence.R b/R/utils-switch-congruence.R
deleted file mode 100644
index b12f91e..0000000
--- a/R/utils-switch-congruence.R
+++ /dev/null
@@ -1,101 +0,0 @@
-#' Switch cost
-#'
-#' Utility function to calculate general and specific switch cost.
-#'
-#' @template common
-#' @param by The column name(s) in `data` used to be grouped by. If set to
-#'   `NULL`, all data will be treated as from one subject.
-#' @templateVar name_acc TRUE
-#' @templateVar name_rt TRUE
-#' @template names
-#' @param name_switch The column name of the `data` input whose values are
-#'   the switch type, in which is a `character` vector with at least `"switch"`
-#'   and `"repeat"` values.
-#' @keywords internal
-calc_switch_cost <- function(data, by, name_switch, name_rt, name_acc) {
-  data[[name_switch]] <- factor(data[[name_switch]], c("switch", "repeat"))
-  calc_cond_diff(
-    data,
-    by,
-    name_cond = name_switch,
-    name_diff_prefix = "switch_cost_",
-    name_acc = name_acc,
-    name_rt = name_rt
-  )
-}
-
-#' Congruence effect
-#'
-#' Utility function to calculate congruence effect sizes.
-#'
-#' @template common
-#' @param by The column name(s) in `data` used to be grouped by. If set to
-#'   `NULL`, all data will be treated as from one subject.
-#' @templateVar name_acc TRUE
-#' @templateVar name_rt TRUE
-#' @template names
-#' @param name_cong The column name of the `data` input whose values are the
-#'   congruence information, in which is a `character` vector with "incongruent
-#'   condition" (label: `"inc"`) and "congruent condition" (label: `"con"`). It
-#'   will be coerced as a `factor` vector with these two levels.
-#' @return A [tibble][tibble::tibble-package] contains congruence effect results
-#'   on accuracy and response time.
-#' @keywords internal
-calc_cong_eff <- function(data, by, name_cong, name_acc, name_rt) {
-  data[[name_cong]] <- factor(data[[name_cong]], c("inc", "con"))
-  calc_cond_diff(
-    data,
-    by,
-    name_cond = name_cong,
-    name_diff_prefix = "cong_eff_",
-    name_acc = name_acc,
-    name_rt = name_rt
-  )
-}
-
-calc_cond_diff <- function(data, by, name_acc, name_rt,
-                           name_cond, name_diff_prefix) {
-  conds <- levels(data[[name_cond]])
-  index_each_cond <- data |>
-    calc_spd_acc(
-      by = c(by, name_cond),
-      name_acc = name_acc,
-      name_rt = name_rt
-    ) |>
-    complete(.data[[name_cond]]) |>
-    select(all_of(c(by, name_cond, "pc", "mrt", "ies", "rcs", "lisas")))
-  index_each_cond |>
-    pivot_longer(
-      cols = -any_of(c(by, name_cond)),
-      names_to = "index_name",
-      values_to = "score"
-    ) |>
-    pivot_wider(
-      names_from = all_of(name_cond),
-      values_from = "score"
-    ) |>
-    mutate(
-      diff = .data[[conds[[1]]]] - .data[[conds[[2]]]],
-      .keep = "unused"
-    ) |>
-    # make sure larger values correspond to larger switch cost
-    mutate(
-      diff = if_else(
-        .data$index_name %in% c("pc", "rcs"),
-        -diff, diff
-      )
-    ) |>
-    pivot_wider(
-      names_from = "index_name",
-      values_from = "diff",
-      names_prefix = name_diff_prefix
-    ) |>
-    merge(
-      index_each_cond |>
-        pivot_wider(
-          names_from = all_of(name_cond),
-          values_from = -any_of(c(by, name_cond))
-        ),
-      by = by
-    )
-}
diff --git a/R/utils-speed-accuracy.R b/R/utils.R
similarity index 63%
rename from R/utils-speed-accuracy.R
rename to R/utils.R
index 5a5d23c..8a50440 100644
--- a/R/utils-speed-accuracy.R
+++ b/R/utils.R
@@ -129,3 +129,93 @@ calc_sdt <- function(data, by = NULL, name_acc = "acc", name_type = "type") {
       omissions = .data$e_s
     )
 }
+
+#' Calculate threshold by staircase method
+#'
+#' Here we used the method suggested by Wetherill et al (1966).
+#'
+#' @param x The levels in data.
+#' @return The mean threshold.
+#' @keywords internal
+calc_staircase_wetherill <- function(x) {
+  find_reversals <- function(x) {
+    find_peaks_val <- function(x) {
+      mat <- pracma::findpeaks(x)
+      if (is.null(mat)) {
+        warn("Reversals not found from input", "no_reversals_found")
+        return(NA_real_)
+      }
+      mat[, 1]
+    }
+    list(
+      peaks = find_peaks_val(x),
+      valleys = -find_peaks_val(-x)
+    )
+  }
+  # remove repetitions in transformed method
+  x <- rle(x)$values
+  reversals <- find_reversals(x)
+  reversals |>
+    purrr::map(
+      # keep equal number of peaks and valleys
+      \(x) utils::tail(x, min(lengths(reversals)))
+    ) |>
+    purrr::list_c() |>
+    mean()
+}
+
+#' Convert character responses
+#'
+#' Simple function converts character correctness to numeric one.
+#'
+#' @param x The character vector to be parsed.
+#' @param delim Delimiter used to join correctness when forming the character.
+#'   Usually is hyphen (i.e., `"-"`), which is the default.
+#' @param convert_numeric A logical value indicating if the values should be
+#'   converted to `numeric` ones.
+#' @return A list of the parsed result, the same length as the input vector.
+#' @keywords internal
+parse_char_resp <- function(x, delim = "-", convert_numeric = TRUE) {
+  parsed <- stringr::str_split(x, delim)
+  if (convert_numeric) {
+    parsed <- purrr::map(parsed, as.numeric)
+  }
+  parsed
+}
+
+
+#' Update settings with option settings
+#'
+#' Options are set in list can be tricky to update. This function makes partly
+#' adding custom options work.
+#'
+#' @param origin The original settings.
+#' @param updates The updates to settings
+#' @return An update list of settings.
+#' @keywords internal
+update_settings <- function(origin, updates) {
+  if (is.null(updates)) {
+    return(origin)
+  }
+  purrr::imap(origin, ~ updates[[.y]] %||% .x)
+}
+
+#' Outliers Detection for response time data
+#'
+#' This method is also called "transform" method, because it does a
+#' transformation before applying z-score method.
+#'
+#' This is based on Cousineau, D., & Chartier, S. (2010), which is said to be
+#' suitable for reaction time data.
+#'
+#' @param x A vector of input reaction time data.
+#' @param threshold The threshold for determining whether a value is outlier or
+#'   not. Default is set at 2.5, which is best sample size dependent.
+#' @return A logical vector of the detected outliers.
+#' @keywords internal
+check_outliers_rt <- function(x, threshold = 2.5) {
+  z_scores <- scale(
+    scale(x, min(x, na.rm = TRUE), diff(range(x, na.rm = TRUE)))
+  )[, 1]
+  abs(z_scores) > threshold
+}
diff --git a/R/utils_numerosity.R b/R/utils_numerosity.R
deleted file mode 100644
index 1085d2e..0000000
--- a/R/utils_numerosity.R
+++ /dev/null
@@ -1,56 +0,0 @@
-#' Fit a Simple Numerosity Model
-#'
-#' This model assumes the distribution of mental representation for a given
-#' number/count k is N(k, (w * k) ^ 2).
-#'
-#' @template common
-#' @param name_bigset,name_smallset Variable name in `data` indicates bigger and
-#'   smaller set.
-#' @param name_acc Variable name in `data` indicates user's response is correct
-#'   or not.
-#' @param n_fit Number of fits to try to find the best estimate.
-#' @param seed Random seed. Default is 1 so that results can be reproduced.
-#' @return A [list()] with structure the same as [optim()].
-#' @export
-fit_numerosity <- function(data, name_bigset, name_smallset, name_acc,
-                           n_fit = 5, seed = 1) {
-  set.seed(seed)
-  b <- data[[name_bigset]]
-  s <- data[[name_smallset]]
-  acc <- data[[name_acc]]
-
-  min_objective <- Inf
-  best_fit <- NULL
-  for (j in seq_len(n_fit)) {
-    repeat {
-      init <- c(w = stats::runif(1))
-      if (ll_numerosity(init, b, s, acc) < 1e6) {
-        break
-      }
-    }
-    fit <- stats::optim(
-      init, ll_numerosity,
-      method = "L-BFGS-B",
-      b = b, s = s, acc = acc,
-      lower = 0
-    )
-    if (fit[["value"]] < min_objective) {
-      best_fit <- fit
-    }
-  }
-  best_fit
-}
-
-ll_numerosity <- function(pars, b, s, acc) {
-  means <- b - s
-  sds <- pars["w"]^2 * (b^2 + s^2)
-
-  # incorrect means the mental representation is less than 0, so lower tail
-  dens <- ifelse(
-    !acc,
-    stats::pnorm(0, means, sds, lower.tail = TRUE, log.p = TRUE),
-    stats::pnorm(0, means, sds, lower.tail = FALSE, log.p = TRUE)
-  )
-
-  return(ifelse(any(!is.finite(dens)), 1e6, -sum(dens)))
-}
diff --git a/man/calc_cong_eff.Rd b/man/calc_cong_eff.Rd
index 1879f87..35c37e9 100644
--- a/man/calc_cong_eff.Rd
+++ b/man/calc_cong_eff.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils-switch-congruence.R
+% Please edit documentation in R/switch-congruence.R
 \name{calc_cong_eff}
 \alias{calc_cong_eff}
 \title{Congruence effect}
diff --git a/man/calc_sdt.Rd b/man/calc_sdt.Rd
index 6d75d9c..b8daee7 100644
--- a/man/calc_sdt.Rd
+++ b/man/calc_sdt.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils-speed-accuracy.R
+% Please edit documentation in R/utils.R
 \name{calc_sdt}
 \alias{calc_sdt}
 \title{Signal Detection Theory}
diff --git a/man/calc_spd_acc.Rd b/man/calc_spd_acc.Rd
index e8dbc85..f5de041 100644
--- a/man/calc_spd_acc.Rd
+++ b/man/calc_spd_acc.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils-speed-accuracy.R
+% Please edit documentation in R/utils.R
 \name{calc_spd_acc}
 \alias{calc_spd_acc}
 \title{Calculate basic speed and accuracy scores}
diff --git a/man/calc_staircase_wetherill.Rd b/man/calc_staircase_wetherill.Rd
index a15ade6..606898c 100644
--- a/man/calc_staircase_wetherill.Rd
+++ b/man/calc_staircase_wetherill.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils-common.R
+% Please edit documentation in R/utils.R
 \name{calc_staircase_wetherill}
 \alias{calc_staircase_wetherill}
 \title{Calculate threshold by staircase method}
diff --git a/man/calc_switch_cost.Rd b/man/calc_switch_cost.Rd
index 53216de..1773be7 100644
--- a/man/calc_switch_cost.Rd
+++ b/man/calc_switch_cost.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils-switch-congruence.R
+% Please edit documentation in R/switch-congruence.R
 \name{calc_switch_cost}
 \alias{calc_switch_cost}
 \title{Switch cost}
diff --git a/man/check_outliers_rt.Rd b/man/check_outliers_rt.Rd
index 1f8c77a..4aeaed3 100644
--- a/man/check_outliers_rt.Rd
+++ b/man/check_outliers_rt.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils-common.R
+% Please edit documentation in R/utils.R
 \name{check_outliers_rt}
 \alias{check_outliers_rt}
 \title{Outliers Detection for response time data}
diff --git a/man/fit_numerosity.Rd b/man/fit_numerosity.Rd
index 035d37f..515826e 100644
--- a/man/fit_numerosity.Rd
+++ b/man/fit_numerosity.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils_numerosity.R
+% Please edit documentation in R/nsymncmp.R
 \name{fit_numerosity}
 \alias{fit_numerosity}
 \title{Fit a Simple Numerosity Model}
diff --git a/man/parse_char_resp.Rd b/man/parse_char_resp.Rd
index 38dca80..8206564 100644
--- a/man/parse_char_resp.Rd
+++ b/man/parse_char_resp.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils-common.R
+% Please edit documentation in R/utils.R
 \name{parse_char_resp}
 \alias{parse_char_resp}
 \title{Convert character responses}
diff --git a/man/update_settings.Rd b/man/update_settings.Rd
index 8050663..da6a7b4 100644
--- a/man/update_settings.Rd
+++ b/man/update_settings.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils-common.R
+% Please edit documentation in R/utils.R
 \name{update_settings}
 \alias{update_settings}
 \title{Update settings with option settings}

From cb8934181aafec8dde8d772fd0da9eab722a673d Mon Sep 17 00:00:00 2001
From: Liang Zhang <psychelzh@outlook.com>
Date: Tue, 31 Oct 2023 11:46:04 +0800
Subject: [PATCH 04/12] Let fit_numerosity find initial at most 10 times

The old `repeat` method will make the program stuck in some cases.

Signed-off-by: Liang Zhang <psychelzh@outlook.com>
---
 R/nsymncmp.R                   | 13 +++++++++++--
 tests/testthat/test-nsymncmp.R | 15 +++++++++++++++
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/R/nsymncmp.R b/R/nsymncmp.R
index 59878cf..025d9fc 100644
--- a/R/nsymncmp.R
+++ b/R/nsymncmp.R
@@ -82,12 +82,21 @@ fit_numerosity <- function(data, name_bigset, name_smallset, name_acc,
   min_objective <- Inf
   best_fit <- NULL
   for (j in seq_len(n_fit)) {
-    repeat {
+    # try 10 times to find a good initial value
+    for (i in seq_len(10)) {
       init <- c(w = stats::runif(1))
-      if (ll_numerosity(init, b, s, acc) < 1e6) {
+      init_objective <- ll_numerosity(init, b, s, acc)
+      if (init_objective < 1e6) {
         break
       }
     }
+    if (init_objective >= 1e6) {
+      warn(
+        "Cannot find a good initial value after 10 tries.",
+        "no_good_init"
+      )
+      return(list(par = c(w = NA_real_), convergence = 1))
+    }
     fit <- stats::optim(
       init, ll_numerosity,
       method = "L-BFGS-B",
diff --git a/tests/testthat/test-nsymncmp.R b/tests/testthat/test-nsymncmp.R
index 582e609..04b5fbb 100644
--- a/tests/testthat/test-nsymncmp.R
+++ b/tests/testthat/test-nsymncmp.R
@@ -52,3 +52,18 @@ test_that("Warning if not converged", {
   nsymncmp(data) |>
     expect_warning(class = "fit_not_converge")
 })
+
+test_that("Warn if no initial values found", {
+  data <- data.frame(
+    b = rep(0, 10),
+    s = rep(0, 10),
+    acc = rep(1, 10)
+  )
+  fit_numerosity(
+    data,
+    name_bigset = "b",
+    name_smallset = "s",
+    name_acc = "acc"
+  ) |>
+    expect_warning(class = "no_good_init")
+})

From ec85135c394a0b0ea499ec70696553efde544cd9 Mon Sep 17 00:00:00 2001
From: Liang Zhang <psychelzh@outlook.com>
Date: Tue, 31 Oct 2023 11:46:32 +0800
Subject: [PATCH 05/12] Return w value even not converged

Signed-off-by: Liang Zhang <psychelzh@outlook.com>
---
 R/nsymncmp.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/R/nsymncmp.R b/R/nsymncmp.R
index 025d9fc..a7a49f1 100644
--- a/R/nsymncmp.R
+++ b/R/nsymncmp.R
@@ -53,7 +53,6 @@ calc_numerosity <- function(data, name_bigset, name_smallset, name_acc) {
       "Cannot find fit after the max number of fitting.",
       "fit_not_converge"
     )
-    pars <- replace(pars, TRUE, NA_real_)
   }
   tibble::as_tibble_row(pars)
 }

From 137d2d5ebfb12ac03e9ff6314cdee6add18e50b7 Mon Sep 17 00:00:00 2001
From: Liang Zhang <psychelzh@outlook.com>
Date: Tue, 31 Oct 2023 11:48:26 +0800
Subject: [PATCH 06/12] Update changelog

Signed-off-by: Liang Zhang <psychelzh@outlook.com>
---
 NEWS.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 584b7da..34698c8 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,9 @@
 # preproc.iquizoo (development version)
 
+## Bug Fixes
+
+* Fixed an edge case when `fit_numerosity()` will stuck in infinite loop.
+
 # preproc.iquizoo 2.5.2
 
 * Enhance code quality of internal functions.

From 5e9d8cd47caca773acb75cb2765ec1ccb099c357 Mon Sep 17 00:00:00 2001
From: Liang Zhang <psychelzh@outlook.com>
Date: Tue, 31 Oct 2023 15:00:34 +0800
Subject: [PATCH 07/12] Use parallel testing

Signed-off-by: Liang Zhang <psychelzh@outlook.com>
---
 DESCRIPTION | 1 +
 1 file changed, 1 insertion(+)

diff --git a/DESCRIPTION b/DESCRIPTION
index 204f7f9..c2edb13 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -31,6 +31,7 @@ Suggests:
     testthat (>= 3.0.0),
     withr
 Config/testthat/edition: 3
+Config/testthat/parallel: true
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.3

From d2679c115884b822fd89efa4a45d114c0d8185eb Mon Sep 17 00:00:00 2001
From: Liang Zhang <psychelzh@outlook.com>
Date: Tue, 31 Oct 2023 15:02:11 +0800
Subject: [PATCH 08/12] Fix test glitches

Signed-off-by: Liang Zhang <psychelzh@outlook.com>
---
 tests/testthat/test-calc_staircase_wetherill.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/testthat/test-calc_staircase_wetherill.R b/tests/testthat/test-calc_staircase_wetherill.R
index c8f1c38..1139fe6 100644
--- a/tests/testthat/test-calc_staircase_wetherill.R
+++ b/tests/testthat/test-calc_staircase_wetherill.R
@@ -8,5 +8,6 @@ test_that("Correctly find all peaks and valleys", {
 test_that("Return NA if no peaks or valleys", {
   is.na(calc_staircase_wetherill(c(2:5))) |>
     expect_true() |>
+    expect_warning(class = "no_reversals_found") |>
     expect_warning(class = "no_reversals_found")
 })

From b440a03facac53d8f7211a997c068977b1257261 Mon Sep 17 00:00:00 2001
From: Liang Zhang <psychelzh@outlook.com>
Date: Tue, 28 Nov 2023 17:43:41 +0800
Subject: [PATCH 09/12] Accept function from tarflow

See https://github.com/psychelzh/tarflow.iquizoo/issues/58 for details

Signed-off-by: Liang Zhang <psychelzh@outlook.com>
---
 NAMESPACE                        |   2 +
 NEWS.md                          |   4 +
 R/preproc.R                      | 122 ++++++++++++++++++++++++
 man/preproc_data.Rd              |  49 ++++++++++
 man/wrangle_data.Rd              |  22 +++++
 tests/testthat/_snaps/preproc.md | 155 +++++++++++++++++++++++++++++++
 tests/testthat/helper-preproc.R  |   8 ++
 tests/testthat/test-preproc.R    |  79 ++++++++++++++++
 8 files changed, 441 insertions(+)
 create mode 100644 R/preproc.R
 create mode 100644 man/preproc_data.Rd
 create mode 100644 man/wrangle_data.Rd
 create mode 100644 tests/testthat/_snaps/preproc.md
 create mode 100644 tests/testthat/helper-preproc.R
 create mode 100644 tests/testthat/test-preproc.R

diff --git a/NAMESPACE b/NAMESPACE
index b964a4c..40a2f5e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -26,6 +26,7 @@ export(multisense)
 export(nback)
 export(nle)
 export(nsymncmp)
+export(preproc_data)
 export(racer)
 export(rapm)
 export(refframe)
@@ -39,6 +40,7 @@ export(sumweighted)
 export(switchcost)
 export(symncmp)
 export(synwin)
+export(wrangle_data)
 import(dplyr)
 import(rlang)
 import(tidyr)
diff --git a/NEWS.md b/NEWS.md
index 34698c8..d66b004 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,9 @@
 # preproc.iquizoo (development version)
 
+## Breaking Changes
+
+* Added `wrangle_data()` and `preproc_data()` functions, which were previously in tarflow.iquizoo package.
+
 ## Bug Fixes
 
 * Fixed an edge case when `fit_numerosity()` will stuck in infinite loop.
diff --git a/R/preproc.R b/R/preproc.R
new file mode 100644
index 0000000..85efef5
--- /dev/null
+++ b/R/preproc.R
@@ -0,0 +1,122 @@
+#' Wrangle Raw Data
+#'
+#' Parse raw json string data as [data.frame()] and store them in a list column.
+#'
+#' @param data The raw data.
+#' @param name_raw_json The column name in which stores user's raw data in
+#'   format of json string.
+#' @param name_raw_parsed The name used to store parsed data.
+#' @return A [data.frame] contains the parsed data.
+#' @export
+wrangle_data <- function(data,
+                         name_raw_json = "game_data",
+                         name_raw_parsed = "raw_parsed") {
+  data[[name_raw_parsed]] <- purrr::map(
+    data[[name_raw_json]],
+    parse_raw_json
+  )
+  select(data, !all_of(name_raw_json))
+}
+
+#' Feed Raw Data to Pre-processing
+#'
+#' Calculate indices using data typically returned by [wrangle_data()].
+#'
+#' @details
+#'
+#' Observations with empty raw data (empty vector, e.g. `NULL`, in
+#' `name_raw_parsed` column) are removed before calculating indices. If no
+#' observations left after removing, a warning is signaled and `NULL` is
+#' returned.
+#'
+#' @param data A [data.frame] contains raw data.
+#' @param fn This can be a function or formula. See [rlang::as_function()] for
+#'   more details.
+#' @param ... Additional arguments passed to `fn`.
+#' @param name_raw_parsed The column name in which stores user's raw data in
+#'   format of a list of [data.frame]s.
+#' @param pivot_results Whether to pivot the calculated indices. If `TRUE`, the
+#'   calculated indices are pivoted into long format, with each index name
+#'   stored in the column of `pivot_names_to`, and each index value stored in
+#'   the column of `pivot_values_to`. If `FALSE`, the calculated indices are
+#'   stored in the same format as returned by `fn`.
+#' @param pivot_names_to,pivot_values_to The column names used to store index
+#'   names and values if `pivot_results` is `TRUE`. See [tidyr::pivot_longer()]
+#'   for more details.
+#' @return A [data.frame] contains the calculated indices.
+#' @export
+preproc_data <- function(data, fn, ...,
+                         name_raw_parsed = "raw_parsed",
+                         pivot_results = TRUE,
+                         pivot_names_to = "index_name",
+                         pivot_values_to = "score") {
+  data <- filter(data, !purrr::map_lgl(.data[[name_raw_parsed]], is_empty))
+  if (nrow(data) == 0) {
+    warn("No non-empty data found.")
+    return()
+  }
+  fn <- as_function(fn)
+  results <- data |>
+    mutate(
+      calc_indices(.data[[name_raw_parsed]], fn, ...),
+      .keep = "unused"
+    )
+  if (pivot_results) {
+    results <- results |>
+      pivot_longer(
+        cols = !any_of(names(data)),
+        names_to = pivot_names_to,
+        values_to = pivot_values_to
+      ) |>
+      vctrs::vec_restore(data)
+  }
+  results
+}
+
+# helper functions
+parse_raw_json <- function(jstr) {
+  parsed <- tryCatch(
+    jsonlite::fromJSON(jstr),
+    error = function(cnd) {
+      warn(
+        c(
+          "Failed to parse json string with the following error:",
+          conditionMessage(cnd),
+          i = "Will parse it as `NULL` instead."
+        )
+      )
+      return()
+    }
+  )
+  if (is_empty(parsed)) {
+    return()
+  }
+  parsed |>
+    rename_with(tolower) |>
+    mutate(across(where(is.character), tolower))
+}
+
+calc_indices <- function(l, fn, ...) {
+  # used as a temporary id for each element
+  name_id <- ".id"
+  tryCatch(
+    bind_rows(l, .id = name_id),
+    error = function(cnd) {
+      warn(
+        c(
+          "Failed to bind raw data with the following error:",
+          conditionMessage(cnd),
+          i = "Will try using tidytable package."
+        )
+      )
+      check_installed(
+        "tidytable",
+        "because tidyr package fails to bind raw data."
+      )
+      tidytable::bind_rows(l, .id = name_id) |>
+        utils::type.convert(as.is = TRUE)
+    }
+  ) |>
+    fn(.by = name_id, ...) |>
+    select(!all_of(name_id))
+}
diff --git a/man/preproc_data.Rd b/man/preproc_data.Rd
new file mode 100644
index 0000000..6e07878
--- /dev/null
+++ b/man/preproc_data.Rd
@@ -0,0 +1,49 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/preproc.R
+\name{preproc_data}
+\alias{preproc_data}
+\title{Feed Raw Data to Pre-processing}
+\usage{
+preproc_data(
+  data,
+  fn,
+  ...,
+  name_raw_parsed = "raw_parsed",
+  pivot_results = TRUE,
+  pivot_names_to = "index_name",
+  pivot_values_to = "score"
+)
+}
+\arguments{
+\item{data}{A \link{data.frame} contains raw data.}
+
+\item{fn}{This can be a function or formula. See \code{\link[rlang:as_function]{rlang::as_function()}} for
+more details.}
+
+\item{...}{Additional arguments passed to \code{fn}.}
+
+\item{name_raw_parsed}{The column name in which stores user's raw data in
+format of a list of \link{data.frame}s.}
+
+\item{pivot_results}{Whether to pivot the calculated indices. If \code{TRUE}, the
+calculated indices are pivoted into long format, with each index name
+stored in the column of \code{pivot_names_to}, and each index value stored in
+the column of \code{pivot_values_to}. If \code{FALSE}, the calculated indices are
+stored in the same format as returned by \code{fn}.}
+
+\item{pivot_names_to, pivot_values_to}{The column names used to store index
+names and values if \code{pivot_results} is \code{TRUE}. See \code{\link[tidyr:pivot_longer]{tidyr::pivot_longer()}}
+for more details.}
+}
+\value{
+A \link{data.frame} contains the calculated indices.
+}
+\description{
+Calculate indices using data typically returned by \code{\link[=wrangle_data]{wrangle_data()}}.
+}
+\details{
+Observations with empty raw data (empty vector, e.g. \code{NULL}, in
+\code{name_raw_parsed} column) are removed before calculating indices. If no
+observations left after removing, a warning is signaled and \code{NULL} is
+returned.
+}
diff --git a/man/wrangle_data.Rd b/man/wrangle_data.Rd
new file mode 100644
index 0000000..7d6f296
--- /dev/null
+++ b/man/wrangle_data.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/preproc.R
+\name{wrangle_data}
+\alias{wrangle_data}
+\title{Wrangle Raw Data}
+\usage{
+wrangle_data(data, name_raw_json = "game_data", name_raw_parsed = "raw_parsed")
+}
+\arguments{
+\item{data}{The raw data.}
+
+\item{name_raw_json}{The column name in which stores user's raw data in
+format of json string.}
+
+\item{name_raw_parsed}{The name used to store parsed data.}
+}
+\value{
+A \link{data.frame} contains the parsed data.
+}
+\description{
+Parse raw json string data as \code{\link[=data.frame]{data.frame()}} and store them in a list column.
+}
diff --git a/tests/testthat/_snaps/preproc.md b/tests/testthat/_snaps/preproc.md
new file mode 100644
index 0000000..b4151e1
--- /dev/null
+++ b/tests/testthat/_snaps/preproc.md
@@ -0,0 +1,155 @@
+# Basic situation in `preproc_data()`
+
+    {
+      "type": "list",
+      "attributes": {
+        "names": {
+          "type": "character",
+          "attributes": {},
+          "value": ["user_id", "index_name", "score"]
+        },
+        "row.names": {
+          "type": "integer",
+          "attributes": {},
+          "value": [1, 2]
+        },
+        "class": {
+          "type": "character",
+          "attributes": {},
+          "value": ["tbl_df", "tbl", "data.frame"]
+        }
+      },
+      "value": [
+        {
+          "type": "integer",
+          "attributes": {},
+          "value": [1, 2]
+        },
+        {
+          "type": "character",
+          "attributes": {},
+          "value": ["nhit", "nhit"]
+        },
+        {
+          "type": "double",
+          "attributes": {},
+          "value": ["NaN", 1]
+        }
+      ]
+    }
+
+---
+
+    {
+      "type": "list",
+      "attributes": {
+        "names": {
+          "type": "character",
+          "attributes": {},
+          "value": ["user_id", "nhit"]
+        },
+        "row.names": {
+          "type": "integer",
+          "attributes": {},
+          "value": [1, 2]
+        },
+        "class": {
+          "type": "character",
+          "attributes": {},
+          "value": ["tbl_df", "tbl", "data.frame"]
+        }
+      },
+      "value": [
+        {
+          "type": "integer",
+          "attributes": {},
+          "value": [1, 2]
+        },
+        {
+          "type": "double",
+          "attributes": {},
+          "value": ["NaN", 1]
+        }
+      ]
+    }
+
+# Deal with `NULL` in parsed data
+
+    {
+      "type": "list",
+      "attributes": {
+        "names": {
+          "type": "character",
+          "attributes": {},
+          "value": ["user_id", "index_name", "score"]
+        },
+        "row.names": {
+          "type": "integer",
+          "attributes": {},
+          "value": [1, 2]
+        },
+        "class": {
+          "type": "character",
+          "attributes": {},
+          "value": ["tbl_df", "tbl", "data.frame"]
+        }
+      },
+      "value": [
+        {
+          "type": "integer",
+          "attributes": {},
+          "value": [1, 3]
+        },
+        {
+          "type": "character",
+          "attributes": {},
+          "value": ["nhit", "nhit"]
+        },
+        {
+          "type": "double",
+          "attributes": {},
+          "value": ["NaN", 1]
+        }
+      ]
+    }
+
+# Can deal with mismatch column types in raw data
+
+    {
+      "type": "list",
+      "attributes": {
+        "names": {
+          "type": "character",
+          "attributes": {},
+          "value": ["user_id", "index_name", "score"]
+        },
+        "row.names": {
+          "type": "integer",
+          "attributes": {},
+          "value": [1, 2, 3]
+        },
+        "class": {
+          "type": "character",
+          "attributes": {},
+          "value": ["tbl_df", "tbl", "data.frame"]
+        }
+      },
+      "value": [
+        {
+          "type": "integer",
+          "attributes": {},
+          "value": [1, 2, 3]
+        },
+        {
+          "type": "character",
+          "attributes": {},
+          "value": ["nhit", "nhit", "nhit"]
+        },
+        {
+          "type": "double",
+          "attributes": {},
+          "value": ["NaN", 2, 3]
+        }
+      ]
+    }
+
diff --git a/tests/testthat/helper-preproc.R b/tests/testthat/helper-preproc.R
new file mode 100644
index 0000000..354db01
--- /dev/null
+++ b/tests/testthat/helper-preproc.R
@@ -0,0 +1,8 @@
+prep_fun <- function(data, .by = NULL) {
+  data |>
+    group_by(pick(all_of(.by))) |>
+    summarise(
+      nhit = mean(.data$nhit[.data$feedback == 1]),
+      .groups = "drop"
+    )
+}
diff --git a/tests/testthat/test-preproc.R b/tests/testthat/test-preproc.R
new file mode 100644
index 0000000..e5b08f6
--- /dev/null
+++ b/tests/testthat/test-preproc.R
@@ -0,0 +1,79 @@
+test_that("Basic situation for `wrangle_data()`", {
+  js_str <- r"([{"a": 1, "b": 2}])"
+  data <- tibble::tibble(game_data = js_str)
+  wrangle_data(data) |>
+    expect_silent() |>
+    expect_named("raw_parsed") |>
+    purrr::pluck("raw_parsed", 1) |>
+    expect_identical(jsonlite::fromJSON(js_str))
+  wrangle_data(data, name_raw_parsed = "parsed") |>
+    expect_silent() |>
+    expect_named("parsed")
+})
+
+test_that("Can deal with invalid or empty json", {
+  data_case_invalid <- data.frame(game_data = "[1")
+  wrangle_data(data_case_invalid) |>
+    expect_warning("Failed to parse json string") |>
+    purrr::pluck("raw_parsed", 1) |>
+    expect_null()
+  data_case_empty <- data.frame(game_data = c("[]", "{}"))
+  wrangle_data(data_case_empty) |>
+    purrr::pluck("raw_parsed") |>
+    purrr::walk(expect_null)
+})
+
+test_that("Change names and values to lowercase", {
+  js_str <- r"([{"A": "A"}, {"A": "B"}])"
+  data <- tibble::tibble(game_data = js_str)
+  wrangle_data(data) |>
+    expect_silent() |>
+    purrr::pluck("raw_parsed", 1) |>
+    expect_identical(data.frame(a = c("a", "b")))
+})
+
+test_that("Basic situation in `preproc_data()`", {
+  data <- tibble::tibble(
+    user_id = 1:2,
+    raw_parsed = list(
+      data.frame(nhit = 1, feedback = 0),
+      data.frame(nhit = 1, feedback = 1)
+    )
+  )
+  preproc_data(data, prep_fun) |>
+    expect_snapshot_value(style = "json2")
+  preproc_data(data, prep_fun, pivot_results = FALSE) |>
+    expect_snapshot_value(style = "json2")
+})
+
+test_that("Deal with `NULL` in parsed data", {
+  tibble::tibble(raw_parsed = list(NULL)) |>
+    preproc_data(prep_fun) |>
+    expect_null() |>
+    expect_warning("No non-empty data found.")
+  tibble::tibble(
+    user_id = 1:3,
+    raw_parsed = list(
+      data.frame(nhit = 1, feedback = 0),
+      NULL,
+      data.frame(nhit = 1, feedback = 1)
+    )
+  ) |>
+    preproc_data(prep_fun) |>
+    expect_snapshot_value(style = "json2")
+})
+
+test_that("Can deal with mismatch column types in raw data", {
+  skip_if_not_installed("tidytable")
+  data <- tibble::tibble(
+    user_id = 1:3,
+    raw_parsed = list(
+      data.frame(nhit = 1, feedback = 0),
+      data.frame(nhit = 2, feedback = 1),
+      data.frame(nhit = "3", feedback = 1)
+    )
+  )
+  preproc_data(data, prep_fun) |>
+    expect_snapshot_value(style = "json2") |>
+    expect_warning("Failed to bind raw data")
+})

From eacbdc90b9d835768023db5948c3a3a8e90acd38 Mon Sep 17 00:00:00 2001
From: Liang Zhang <psychelzh@outlook.com>
Date: Tue, 28 Nov 2023 18:03:43 +0800
Subject: [PATCH 10/12] Add more used packages

Signed-off-by: Liang Zhang <psychelzh@outlook.com>
---
 DESCRIPTION | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/DESCRIPTION b/DESCRIPTION
index c2edb13..d563142 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -15,6 +15,7 @@ Depends:
     R (>= 4.1.0)
 Imports:
     dplyr,
+    jsonlite,
     pracma,
     purrr,
     rlang (>= 0.1.2),
@@ -29,6 +30,7 @@ Suggests:
     readr,
     roxygen2,
     testthat (>= 3.0.0),
+    tidytable,
     withr
 Config/testthat/edition: 3
 Config/testthat/parallel: true

From f6f4398d8d06f4e00c0fbaec542f628df6102c22 Mon Sep 17 00:00:00 2001
From: Liang Zhang <psychelzh@outlook.com>
Date: Wed, 29 Nov 2023 01:10:13 +0800
Subject: [PATCH 11/12] Increment version number to 2.6.0

---
 DESCRIPTION | 2 +-
 NEWS.md     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index d563142..5f190cc 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: preproc.iquizoo
 Title: Utility Functions for Data Processing of Iquizoo Games
-Version: 2.5.2.9000
+Version: 2.6.0
 Authors@R:
     person("Liang", "Zhang", , "psychelzh@outlook.com", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0001-9041-1150"))
diff --git a/NEWS.md b/NEWS.md
index d66b004..f020b80 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,4 @@
-# preproc.iquizoo (development version)
+# preproc.iquizoo 2.6.0
 
 ## Breaking Changes
 

From bed6516b069f11a201f122bc6a2b7ba3908cdb64 Mon Sep 17 00:00:00 2001
From: Liang Zhang <psychelzh@outlook.com>
Date: Wed, 29 Nov 2023 01:22:07 +0800
Subject: [PATCH 12/12] Add reference structure

Signed-off-by: Liang Zhang <psychelzh@outlook.com>
---
 _pkgdown.yml | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/_pkgdown.yml b/_pkgdown.yml
index c49d4fa..ec9c251 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -1,3 +1,16 @@
 url: https://psychelzh.github.io/preproc.iquizoo
 template:
   bootstrap: 5
+reference:
+  - title: "High-level API for data pre-processing"
+    desc: Typically you would like to call these two functions to invoke processing.
+    contents:
+    - wrangle_data
+    - preproc_data
+  - title: "Low-level data pre-processing functions"
+    desc: >
+      These functions are typically used in the `preproc_data` function,
+      separately for each task paradigm.
+    contents:
+    - -wrangle_data
+    - -preproc_data