a single \donttest{} is enough

sdcTools · May 17, 2024 · 9793865 · 9793865
1 parent d517632
commit 9793865
Showing 1 changed file with 50 additions and 51 deletions.
diff --git a/R/dUtility.R b/R/dUtility.R
@@ -5,11 +5,11 @@
 #'
 #' The standardised distances of the perturbed data values to the original ones
 #' are measured. The following measures are available:
-#' - `"IL1`: sum of absolute distances between original and perturbed variables 
+#' - `"IL1`: sum of absolute distances between original and perturbed variables
 #' scaled by absolute values of the original variables
 #' - `"IL1s`: measures the absolute distances between original
-#' and perturbed ones, scaled by the standard deviation of original variables times 
-#' the square root of `2`. 
+#' and perturbed ones, scaled by the standard deviation of original variables times
+#' the square root of `2`.
 #' - `"eigen`; compares the eigenvalues of original and perturbed data
 #' - `"robeigen`; compares robust eigenvalues of original and perturbed data
 #' @name dUtility
@@ -22,7 +22,7 @@
 #' @return data utility or modified entry for data utility the [sdcMicroObj-class].
 #' @author Matthias Templ
 #' @seealso [dRisk()], [dRiskRMD()]
-#' @references for IL1 and IL1s: see Mateo-Sanz, Sebe, Domingo-Ferrer. 
+#' @references for IL1 and IL1s: see Mateo-Sanz, Sebe, Domingo-Ferrer.
 #' Outlier Protection in Continuous Microdata Masking.
 #' International Workshop on Privacy in Statistical Databases.
 #' PSD 2004: Privacy in Statistical Databases pp 201-215.
@@ -59,7 +59,6 @@
 #'   keyVars=c('urbrur','roof','walls','water','electcon','relat','sex'),
 #'   numVars=c('expend','income','savings'), w='sampling_weight')
 #' ## this is already made internally, so you don't need to run this:
-#' \donttest{
 #' sdc <- dUtility(sdc)
 #' }
 dUtility <- function(obj, ...) {
@@ -96,7 +95,7 @@ dUtilityWORK <- function(x, xm, method = "IL1s") {
   if (method == "IL1") {
     a <- x
     for (i in 1:dim(x)[2]) {
-      a[[i]] <- 100 * abs(x[[i]] - xm[[i]]) / (abs(x[[i]])) 
+      a[[i]] <- 100 * abs(x[[i]] - xm[[i]]) / (abs(x[[i]]))
     }
     a[a == "Inf"] <- NA
     infLoss1 <- sum(a, na.rm = TRUE)
@@ -140,30 +139,30 @@ dUtilityWORK <- function(x, xm, method = "IL1s") {
 
 
 #' Additional Information-Loss measures
-#' 
+#'
 #' Measures [IL_correl()] and [IL_variables()] were proposed by Andrzej Mlodak and are (theoretically) bounded between `0` and `1`.
-#' 
+#'
 #' - `IL_correl()`: is a information-loss measure that can be applied to common numerically scaled variables in `x` and `xm`. It is based
-#' on diagonal entries of inverse correlation matrices in the original and perturbed data. 
-#' - `IL_variables()`: for common-variables in `x` and `xm` the individual distance-functions depend on the class of the variable; 
+#' on diagonal entries of inverse correlation matrices in the original and perturbed data.
+#' - `IL_variables()`: for common-variables in `x` and `xm` the individual distance-functions depend on the class of the variable;
 #' specifically these functions are different for numeric variables, ordered-factors and character/factor variables. The individual distances
-#' are summed up and scaled by `n * m` with `n` being the number of records and `m` being the number of (common) variables. 
+#' are summed up and scaled by `n * m` with `n` being the number of records and `m` being the number of (common) variables.
 #' @author Bernhard Meindl <bernhard.meindl@@statistik.gv.at>
 #' @details Details can be found in the references below
-#' 
-#' The implementation of [IL_correl()] differs slightly with the original proposition from Mlodak, A. (2020) as 
+#'
+#' The implementation of [IL_correl()] differs slightly with the original proposition from Mlodak, A. (2020) as
 #' the constant multiplier was changed to `1 / sqrt(2)` instead of `1/2` for better efficiency and interpretability
 #' of the measure.
-#' 
+#'
 #' @param x an object coercible to a `data.frame` representing the original dataset
 #' @param xm an object coercible to a `data.frame` representing the perturbed, modified dataset
 #' @param digits number digits used for rounding when displaying results
 #' @param ... additional parameter for print-methods; currently ignored
 #' @references Mlodak, A. (2020). Information loss resulting from statistical disclosure control of output data,
 #' Wiadomosci Statystyczne. The Polish Statistician, 2020, 65(9), 7-27, DOI: 10.5604/01.3001.0014.4121
-#' 
-#' Mlodak, A. (2019). Using the Complex Measure in an Assessment of the Information Loss Due to the Microdata Disclosure Control, 
-#' Przegląd Statystyczny, 2019, 66(1), 7-26, 
+#'
+#' Mlodak, A. (2019). Using the Complex Measure in an Assessment of the Information Loss Due to the Microdata Disclosure Control,
+#' Przegląd Statystyczny, 2019, 66(1), 7-26,
 #' DOI: 10.5604/01.3001.0013.8285
 #' @return the corresponding information-loss measure
 #' @export
@@ -173,8 +172,8 @@ dUtilityWORK <- function(x, xm, method = "IL1s") {
 #' data("Tarragona", package = "sdcMicro")
 #' res1 <- addNoise(obj = Tarragona, variables = colnames(Tarragona), noise = 100)
 #' IL_correl(x = as.data.frame(res1$x), xm = as.data.frame(res1$xm))
-#' 
-#' res2 <- addNoise(obj = Tarragona, variables = colnames(Tarragona), noise = 25) 
+#'
+#' res2 <- addNoise(obj = Tarragona, variables = colnames(Tarragona), noise = 25)
 #' IL_correl(x = as.data.frame(res2$x), xm = as.data.frame(res2$xm))
 IL_correl <- function(x, xm) {
   # compute inverse of correlation matrix and return diagonal entries
@@ -185,44 +184,44 @@ IL_correl <- function(x, xm) {
     }
     diag(inv_mat)
   }
-  
+
   stopifnot(is.data.frame(x), is.data.frame(xm))
-  
+
   x <- data.table::setDT(x)
   xm <- data.table::setDT(xm)
-  
+
   cn <- intersect(names(x), names(xm))
   if (length(cn) == 0) {
     stop("please supply data.frames with overlapping variable names", call. = FALSE)
   }
-  
+
   # compute overlapping numeric variables
   numerics_x <- sort(cn[sapply(cn, function(k) {
     is.numeric(x[[k]])
   })])
-  
+
   numerics_y <- sort(cn[sapply(cn, function(k) {
     is.numeric(xm[[k]])
   })])
-  
+
   stopifnot(all.equal(numerics_x, numerics_y))
-  
+
   df_o <- x[, numerics_x, with = FALSE, drop = FALSE]
   df_p <- xm[, numerics_y, with = FALSE, drop = FALSE]
-  
+
   # compute diagonal entries
   diags_o <- .get_inverse_diags(df = df_o)
   diags_p <- .get_inverse_diags(df = df_p)
-  
+
   # compute inner denominator in formula
-  denom_o <- sqrt(sum(diags_o^2));  
+  denom_o <- sqrt(sum(diags_o^2));
   denom_p <- sqrt(sum(diags_p^2))
-  
+
   # compute lambda
-  res <- sqrt(sum(((diags_o / denom_o) - (diags_p / denom_p))^2)) 
+  res <- sqrt(sum(((diags_o / denom_o) - (diags_p / denom_p))^2))
   #res <- 0.5 * res # original proposal
   res <- res  * (1 / sqrt(2))
-  
+
   attr(res, "nr_vars") <- ncol(df_o)
   attr(res, "n_x") <- nrow(x)
   attr(res, "n_y") <- nrow(xm)
@@ -244,7 +243,7 @@ print.il_correl <- function(x, digits = 3, ...) {
 #' @export
 #' @rdname il_additional
 #' @examples
-#' 
+#'
 #' # creating test-inputs
 #' n <- 150
 #' x <- xm <- data.frame(
@@ -266,14 +265,14 @@ IL_variables <- function(x, xm) {
       stop("`p` must be an (ordered) factor or character", call. = FALSE)
     }
     stopifnot(length(x) == length(p))
-    
+
     if (is.factor(x)) x <- as.character(x)
     if (is.factor(p)) p <- as.character(p)
-    
+
     dists <- rep(1, length(x))
     na_x <- is.na(x)
     na_p <- is.na(p)
-    
+
     dists <- rep(1, length(x))
     dists[x == p] <- 0
     dists[na_x | na_p] <- 1
@@ -284,9 +283,9 @@ IL_variables <- function(x, xm) {
     stopifnot(is.factor(x)); stopifnot("ordered" %in% class(x))
     stopifnot(is.factor(p)); stopifnot("ordered" %in% class(p))
     stopifnot(all.equal(levels(x), levels(p)))
-    
+
     max_diff <- length(levels(x)) - 1
-    
+
     # single NA -> max_diff
     # both NA -> 0
     d <- abs(as.numeric(x) - as.numeric(p))
@@ -297,43 +296,43 @@ IL_variables <- function(x, xm) {
   dist_num <- function(x, p) {
     stopifnot(is.numeric(x))
     stopifnot(is.numeric(p))
-    
+
     # max. possible difference
     max_diff <- max(abs(range(p, na.rm = TRUE) - rev(range(x, na.rm = TRUE))))
-    
+
     v <- abs(x - p)
     v[is.na(x) | is.na(p)] <- max_diff
     v[is.na(x) & is.na(p)] <- 0
     sum((2 / pi) * atan(v))
   }
-  
+
   stopifnot(is.data.frame(x), is.data.frame(xm))
   x <- data.table::setDT(x)
   xm <- data.table::setDT(xm)
   stopifnot(nrow(x) == nrow(xm))
-  
+
   cn <- intersect(names(x), names(xm))
   if (length(cn) == 0) {
     stop("please supply data.frames with overlapping variable names", call. = FALSE)
   }
-  
+
   cl_o <- sapply(cn, function(v) {
     class(x[[v]])
   })
   cl_p <- sapply(cn, function(v) {
     class(xm[[v]])
   })
-  
+
   # classes of common-variables must match
   if(!all.equal(cl_o, cl_p)) {
     stop("classes of common-variables in `x` and `xm` must be identical", call. = FALSE)
   }
-  
+
   inp <- data.frame(v = cn, type = "nominal")
   inp[sapply(cn, function(v) inherits(x[[v]], "numeric")), "type"] <- "numeric"
-  inp[sapply(cn, function(v) inherits(x[[v]], "ordered")), "type"] <- "ordered"  
-  
-  # compute distances  
+  inp[sapply(cn, function(v) inherits(x[[v]], "ordered")), "type"] <- "ordered"
+
+  # compute distances
   inp$dists <- sapply(1:nrow(inp), function(y) {
     v <- inp$v[y]
     vals_o <- x[[v]]
@@ -346,14 +345,14 @@ IL_variables <- function(x, xm) {
       dist_nom(x = vals_o, p = vals_p)
     }
   })
-  
+
   # aggregate distances to compute final utility measure
   lambda <- sum(inp$dists) / (length(cn) * nrow(x))
-  
+
   # individual contributions
   indiv_contr <- inp$dists / nrow(x)
   names(indiv_contr) <- inp$v
-  
+
   attr(lambda, "indiv_distances") <- indiv_contr
   attr(lambda, "n") <- nrow(x)
   class(lambda) <- "il_variables"