diff --git a/NEWS.md b/NEWS.md index 44565546d..d4c0c0e9a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,15 @@ # teal.data 0.6.0.9006 +### Enhancements + +- `datanames()` + - if `join_keys` are provided, the `datanames()` are now sorted in topological way (`Kahn` algorithm), + which means the parent dataset always precedes the child dataset. + - are extended by the parent dataset name, if one of the child dataset exist in `datanames()` and + the connection between child-parent is set through `join_keys` and `parent` exist in `teal_data` environment. + - do not allow to set a dataset name that do not exist in `teal_data` environment. + - `teal_data` no longer set default `datanames()` based on `join_keys` names - it uses only data names. + # teal.data 0.6.0 ### Enhancements diff --git a/R/join_keys.R b/R/join_keys.R index 47fb97836..1798a46af 100644 --- a/R/join_keys.R +++ b/R/join_keys.R @@ -142,6 +142,7 @@ join_keys.teal_data <- function(...) { #' join_keys(td) `join_keys<-.teal_data` <- function(x, value) { join_keys(x@join_keys) <- value + datanames(x) <- x@datanames # datanames fun manages some exceptions x } diff --git a/R/teal_data-class.R b/R/teal_data-class.R index 542ee76bb..3015797b9 100644 --- a/R/teal_data-class.R +++ b/R/teal_data-class.R @@ -60,7 +60,7 @@ setClass( new_teal_data <- function(data, code = character(0), join_keys = join_keys(), - datanames = union(names(data), names(join_keys))) { + datanames = names(data)) { checkmate::assert_list(data) checkmate::assert_class(join_keys, "join_keys") if (is.null(datanames)) datanames <- character(0) # todo: allow to specify @@ -82,6 +82,8 @@ new_teal_data <- function(data, new_env <- rlang::env_clone(list2env(data), parent = parent.env(.GlobalEnv)) lockEnvironment(new_env, bindings = TRUE) + datanames <- .get_sorted_datanames(datanames = datanames, join_keys = join_keys, env = new_env) + methods::new( "teal_data", env = new_env, diff --git a/R/teal_data-datanames.R b/R/teal_data-datanames.R index 929f09eb1..d32727859 100644 --- a/R/teal_data-datanames.R +++ b/R/teal_data-datanames.R @@ -42,7 +42,7 @@ setMethod("datanames", signature = "qenv.error", definition = function(x) { setGeneric("datanames<-", function(x, value) standardGeneric("datanames<-")) setMethod("datanames<-", signature = c("teal_data", "character"), definition = function(x, value) { checkmate::assert_subset(value, names(x@env)) - x@datanames <- value + x@datanames <- .get_sorted_datanames(datanames = value, join_keys = x@join_keys, env = x@env) methods::validObject(x) x }) @@ -50,3 +50,19 @@ setMethod("datanames<-", signature = c("qenv.error", "character"), definition = methods::validObject(x) x }) + + +#' @keywords internal +.get_sorted_datanames <- function(datanames, join_keys, env) { + child_parent <- sapply( + datanames, + function(name) parent(join_keys, name), + USE.NAMES = TRUE, + simplify = FALSE + ) + + union( + intersect(unlist(topological_sort(child_parent)), ls(env)), + datanames + ) +} diff --git a/inst/WORDLIST b/inst/WORDLIST index f53064cb2..6860ac2d2 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -3,6 +3,7 @@ CDISC Forkers Getter Hoffmann +Kahn ORCID Reproducibility formatters diff --git a/man/new_teal_data.Rd b/man/new_teal_data.Rd index 176a04ae5..4ddc43c95 100644 --- a/man/new_teal_data.Rd +++ b/man/new_teal_data.Rd @@ -8,7 +8,7 @@ new_teal_data( data, code = character(0), join_keys = join_keys(), - datanames = union(names(data), names(join_keys)) + datanames = names(data) ) } \arguments{ diff --git a/tests/testthat/test-datanames.R b/tests/testthat/test-datanames.R index 68966006f..e0c99bb63 100644 --- a/tests/testthat/test-datanames.R +++ b/tests/testthat/test-datanames.R @@ -41,3 +41,97 @@ testthat::test_that("datanames<- called on qenv.error does not change qenv.error qec <- qe testthat::expect_identical(qe, qec) }) + +# topological_order ---- + +testthat::test_that("datanames are set in topological order in constructor if join_keys are specified", { + data <- + teal_data(b = data.frame(), a = data.frame(), join_keys = join_keys(join_key("a", "b", "id"))) + testthat::expect_identical( + datanames(data), + c("a", "b") + ) +}) + +testthat::test_that("datanames return parent if in constructor it was provided in join_keys and exists in env", { + data <- + teal_data(b = data.frame(), a = data.frame(), join_keys = join_keys(join_key("a", "b", "id"))) + datanames(data) <- "b" + testthat::expect_identical( + datanames(data), + c("a", "b") + ) +}) + +testthat::test_that( + "datanames do not return parent if in constructor it was provided in join_keys but do not exists in env", + { + data <- + teal_data(b = data.frame(), join_keys = join_keys(join_key("a", "b", "id"))) + testthat::expect_identical( + datanames(data), + "b" + ) + } +) + +testthat::test_that("datanames return topological order of datasets once join_keys are specified", { + data <- within(teal_data(), { + ADTTE <- teal.data::rADTTE # nolint: object_name. + iris <- iris + ADSL <- teal.data::rADSL # nolint: object_name. + }) + datanames(data) <- c("ADTTE", "iris", "ADSL") + join_keys(data) <- default_cdisc_join_keys[c("ADSL", "ADTTE")] + testthat::expect_identical( + datanames(data), + c("ADSL", "ADTTE", "iris") + ) +}) + +testthat::test_that("datanames return topological order of datasets after datanames are called after join_keys", { + data <- within(teal_data(), { + ADTTE <- teal.data::rADTTE # nolint: object_name. + iris <- iris + ADSL <- teal.data::rADSL # nolint: object_name. + }) + + join_keys(data) <- default_cdisc_join_keys[c("ADSL", "ADTTE")] + datanames(data) <- c("ADTTE", "iris", "ADSL") + + testthat::expect_identical( + datanames(data), + c("ADSL", "ADTTE", "iris") + ) +}) + + +testthat::test_that("datanames return parent if join_keys were provided and parent exists in env", { + data <- within(teal_data(), { + ADTTE <- teal.data::rADTTE # nolint: object_name. + iris <- iris + ADSL <- teal.data::rADSL # nolint: object_name. + }) + + join_keys(data) <- default_cdisc_join_keys[c("ADSL", "ADTTE")] + datanames(data) <- c("ADTTE", "iris") + + testthat::expect_identical( + datanames(data), + c("ADSL", "ADTTE", "iris") + ) +}) + +testthat::test_that("datanames do not return parent if join_keys were provided and parent did not exists in env", { + data <- teal_data( + ADTTE = teal.data::rADTTE, # nolint: object_name. + iris = iris + ) + + join_keys(data) <- default_cdisc_join_keys[c("ADSL", "ADTTE")] + + testthat::expect_identical( + datanames(data), + c("ADTTE", "iris") + ) +}) diff --git a/tests/testthat/test-teal_data.R b/tests/testthat/test-teal_data.R index 72c7e7e3b..8391f72e2 100644 --- a/tests/testthat/test-teal_data.R +++ b/tests/testthat/test-teal_data.R @@ -28,19 +28,25 @@ testthat::test_that("teal_data initializes teal_data object with @datanames take ) }) -testthat::test_that("teal_data initializes teal_data object with @datanames taken from passed join_keys", { - testthat::expect_identical( - teal_data(join_keys = join_keys(join_key("parent", "child", "id")))@datanames, - c("parent", "child") - ) -}) +testthat::test_that( + "teal_data initializes teal_data object without @datanames taken from join_keys if objects did not exist in env", + { + testthat::expect_identical( + datanames(teal_data(join_keys = join_keys(join_key("parent", "child", "id")))), + character(0) + ) + } +) -testthat::test_that("teal_data initializes teal_data object with @datanames taken from join_keys and passed objects", { - testthat::expect_identical( - teal_data(iris = iris, join_keys = join_keys(join_key("parent", "child", "id")))@datanames, - c("iris", "parent", "child") - ) -}) +testthat::test_that( + "teal_data initializes teal_data object with @datanames taken only from passed objects and not join_keys", + { + testthat::expect_identical( + datanames(teal_data(iris = iris, join_keys = join_keys(join_key("parent", "child", "id")))), + "iris" + ) + } +) testthat::test_that("teal_data returns teal_data when data passed as named list", {