Skip to content

Commit

Permalink
start working on #349
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhard-da committed May 17, 2024
1 parent 9793865 commit faab16f
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 28 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ LinkingTo: Rcpp
Depends:
R (>= 2.10)
Suggests:
laeken,testthat
laeken,
parallel,
testthat
Imports:
utils,
stats,
Expand Down
66 changes: 46 additions & 20 deletions R/localSuppression.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,14 @@
#' in the data.frame method. This means that k-anonymity is provided within each category
#' of the specified variable.}
#' \item{alpha: }{numeric value between 0 and 1 specifying how much keys that
#' contain missing values (\code{NAs}) should contribute to the calculation
#' of \code{fk} and \code{Fk}. For the default value of \code{1}, nothing changes with
#' respect to the implementation in prior versions. Each \emph{wildcard-match} would
#' be counted while for \code{alpha=0} keys with missing values would be basically ignored.
#' Used in the data-frame method only because in the method for \code{\link{sdcMicroObj-class}}-objects,
#' this value is extracted from slot \code{options}.}
#' contain missing values (`NAs`) should contribute to the calculation
#' of `fk` and `Fk`. For the default value of `1`, nothing changes with
#' respect to the implementation in prior versions. Each *wildcard-match* would
#' be counted while for `alpha=0` keys with missing values would be basically ignored.
#' Used in the `data.frame` method only because in the method for \code{\link{sdcMicroObj-class}}-objects,
#' this value is extracted from slot `options`.}
#' \item{nc: }{max. number of cores used when computations are performed by strata. This parameter
#' defaults to `1` (no parallelisation) and is ignored on windows-platforms. }
#' }
#' @return Manipulated data set with suppressions that has k-anonymity with
#' respect to specified key-variables or the manipulated data stored in the
Expand Down Expand Up @@ -85,7 +87,7 @@
#' w = "sampling_weight",
#' strataVar = "ageG"
#' )
#' sdc <- localSuppression(sdc)
#' sdc <- localSuppression(sdc, nc = 1)
#'
#' ## it is also possible to provide k-anonymity for subsets of key-variables
#' ## with different parameter k!
Expand Down Expand Up @@ -124,7 +126,7 @@ setGeneric("localSuppressionX", function(obj, k = 2, importance = NULL, combs =
setMethod(
f = "localSuppressionX",
signature = c("sdcMicroObj"),
definition = function(obj, k = 2, importance = NULL, combs = NULL) {
definition = function(obj, k = 2, importance = NULL, combs = NULL, nc = 1) {
obj <- nextSdcObj(obj)
### get data from manipKeyVars
df <- as.data.frame(get.sdcMicroObj(obj, type = "manipKeyVars"))
Expand All @@ -145,7 +147,8 @@ setMethod(
k = k,
combs = combs,
importance = importance,
alpha = alpha)
alpha = alpha,
nc = nc)

# create final output
obj <- set.sdcMicroObj(obj, type = "manipKeyVars", input = list(ls$xAnon))
Expand Down Expand Up @@ -187,7 +190,8 @@ setMethod(
combs = NULL,
keyVars,
strataVars = NULL,
alpha = 1) {
alpha = 1,
nc = 1) {

localSuppressionWORK(
x = obj,
Expand All @@ -196,7 +200,8 @@ setMethod(
strataVars = strataVars,
importance = importance,
combs = combs,
alpha = alpha)
alpha = alpha,
nc = nc)
}
)

Expand Down Expand Up @@ -338,7 +343,7 @@ suppSubset <- function(x, k, importance, alpha) {
sum_na <- function(x) {
sum(is.na(x))
}
localSuppressionWORK <- function(x, keyVars, strataVars, k=2, combs, importance=NULL, alpha) {
localSuppressionWORK <- function(x, keyVars, strataVars, k=2, combs, importance=NULL, alpha, nc=1) {
# find a suppression pattern for a simple subset that is not stratified
# input: df=data.table with only keyVars
# k: parameter for k-anonymity (length 1)
Expand All @@ -360,6 +365,17 @@ localSuppressionWORK <- function(x, keyVars, strataVars, k=2, combs, importance=
strataVarsNum <- match(strataVars, colnames(x))
}

stopifnot(is.numeric(nc))
nc <- round(nc)[1]
stopifnot(nc >= 1)
# parallel::mclapply does only work reliably under linux/mac
# and it is not worth bothering dealing with windows here
if (!tolower(Sys.info()["sysname"]) %in% c("linux", "darwin")) {
nc <- 1
} else {
nc <- max(1, min(nc, parallel::detectCores() - 1))
}

# checks and preparations if we apply localSuppression on
# subsets of key variables
if (!is.null(combs)) {
Expand Down Expand Up @@ -467,14 +483,24 @@ localSuppressionWORK <- function(x, keyVars, strataVars, k=2, combs, importance=
supps <- xAnon <- vector("list", length = length(spl))
totalSupps <- rep(NA, length(spl))
if (is.null(combs)) {
# todo: using parallel/mclapply?
for (i in seq_along(spl)) {
res <- suppSubset(
x = spl[[i]][, keyVars, with = FALSE],
k = k,
importance = importance,
alpha = alpha)
xAnon[[i]] <- res$xAnon
if (nc == 1) {
#message("running serially")
xAnon <- lapply(seq_len(length(spl)), function(x) {
suppSubset(
x = spl[[x]][, keyVars, with = FALSE],
k = k,
importance = importance,
alpha = alpha)$xAnon
})
} else {
#message("running in parallel using ", nc, " cores")
xAnon <- parallel::mclapply(seq_len(length(spl)), function(x) {
suppSubset(
x = spl[[x]][, keyVars, with = FALSE],
k = k,
importance = importance,
alpha = alpha)$xAnon
}, mc.cores = nc)
}
} else {
# local Suppression by strata and combination of subsets!
Expand Down
16 changes: 9 additions & 7 deletions man/localSuppression.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit faab16f

Please sign in to comment.