Skip to content

Commit

Permalink
Add topological_sort() to datanames(teal_data()) and also extend …
Browse files Browse the repository at this point in the history
…`datanames()` with parent dataset when it is provided in `join_keys` (#319)

Part of insightsengineering/teal#1253

This PR introduced below changes
- `teal_data()` constructor does not put `names(join_keys)` in default
`datanames(teal_data())` to maintain consistency with other features,
where we do not allow `datanames()` to contain names of objects that do
not exist in `@env`
- `datanames()` now sorts names topologically based on provided
`join_keys()`
- each time `datanames()` and `join_keys()` is overwritten the sort is
applied to `teal_data()@datanames`
- provided few more tests
- adjusted 2 tests that assumed extraction of `datanames()` from
`join_keys` is fine - but it's not right now as we do not allow
`datanames()` to contain names of objects that do not exists in `@env`

Will provide testing in this PR in teal as well
https://github.com/insightsengineering/teal/pull/1280/files

---------

Signed-off-by: Marcin <[email protected]>
Co-authored-by: André Veríssimo <[email protected]>
Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: 27856297+dependabot-preview[bot]@users.noreply.github.com <27856297+dependabot-preview[bot]@users.noreply.github.com>
Co-authored-by: go_gonzo <[email protected]>
  • Loading branch information
5 people authored Aug 12, 2024
1 parent b8f4522 commit d240c22
Show file tree
Hide file tree
Showing 8 changed files with 145 additions and 15 deletions.
10 changes: 10 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# teal.data 0.6.0.9006

### Enhancements

- `datanames()`
- if `join_keys` are provided, the `datanames()` are now sorted in topological way (`Kahn` algorithm),
which means the parent dataset always precedes the child dataset.
- are extended by the parent dataset name, if one of the child dataset exist in `datanames()` and
the connection between child-parent is set through `join_keys` and `parent` exist in `teal_data` environment.
- do not allow to set a dataset name that do not exist in `teal_data` environment.
- `teal_data` no longer set default `datanames()` based on `join_keys` names - it uses only data names.

# teal.data 0.6.0

### Enhancements
Expand Down
1 change: 1 addition & 0 deletions R/join_keys.R
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ join_keys.teal_data <- function(...) {
#' join_keys(td)
`join_keys<-.teal_data` <- function(x, value) {
join_keys(x@join_keys) <- value
datanames(x) <- x@datanames # datanames fun manages some exceptions
x
}

Expand Down
4 changes: 3 additions & 1 deletion R/teal_data-class.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ setClass(
new_teal_data <- function(data,
code = character(0),
join_keys = join_keys(),
datanames = union(names(data), names(join_keys))) {
datanames = names(data)) {
checkmate::assert_list(data)
checkmate::assert_class(join_keys, "join_keys")
if (is.null(datanames)) datanames <- character(0) # todo: allow to specify
Expand All @@ -82,6 +82,8 @@ new_teal_data <- function(data,
new_env <- rlang::env_clone(list2env(data), parent = parent.env(.GlobalEnv))
lockEnvironment(new_env, bindings = TRUE)

datanames <- .get_sorted_datanames(datanames = datanames, join_keys = join_keys, env = new_env)

methods::new(
"teal_data",
env = new_env,
Expand Down
18 changes: 17 additions & 1 deletion R/teal_data-datanames.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,27 @@ setMethod("datanames", signature = "qenv.error", definition = function(x) {
setGeneric("datanames<-", function(x, value) standardGeneric("datanames<-"))
setMethod("datanames<-", signature = c("teal_data", "character"), definition = function(x, value) {
checkmate::assert_subset(value, names(x@env))
x@datanames <- value
x@datanames <- .get_sorted_datanames(datanames = value, join_keys = x@join_keys, env = x@env)
methods::validObject(x)
x
})
setMethod("datanames<-", signature = c("qenv.error", "character"), definition = function(x, value) {
methods::validObject(x)
x
})


#' @keywords internal
.get_sorted_datanames <- function(datanames, join_keys, env) {
child_parent <- sapply(
datanames,
function(name) parent(join_keys, name),
USE.NAMES = TRUE,
simplify = FALSE
)

union(
intersect(unlist(topological_sort(child_parent)), ls(env)),
datanames
)
}
1 change: 1 addition & 0 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ CDISC
Forkers
Getter
Hoffmann
Kahn
ORCID
Reproducibility
formatters
Expand Down
2 changes: 1 addition & 1 deletion man/new_teal_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

94 changes: 94 additions & 0 deletions tests/testthat/test-datanames.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,97 @@ testthat::test_that("datanames<- called on qenv.error does not change qenv.error
qec <- qe
testthat::expect_identical(qe, qec)
})

# topological_order ----

testthat::test_that("datanames are set in topological order in constructor if join_keys are specified", {
data <-
teal_data(b = data.frame(), a = data.frame(), join_keys = join_keys(join_key("a", "b", "id")))
testthat::expect_identical(
datanames(data),
c("a", "b")
)
})

testthat::test_that("datanames return parent if in constructor it was provided in join_keys and exists in env", {
data <-
teal_data(b = data.frame(), a = data.frame(), join_keys = join_keys(join_key("a", "b", "id")))
datanames(data) <- "b"
testthat::expect_identical(
datanames(data),
c("a", "b")
)
})

testthat::test_that(
"datanames do not return parent if in constructor it was provided in join_keys but do not exists in env",
{
data <-
teal_data(b = data.frame(), join_keys = join_keys(join_key("a", "b", "id")))
testthat::expect_identical(
datanames(data),
"b"
)
}
)

testthat::test_that("datanames return topological order of datasets once join_keys are specified", {
data <- within(teal_data(), {
ADTTE <- teal.data::rADTTE # nolint: object_name.
iris <- iris
ADSL <- teal.data::rADSL # nolint: object_name.
})
datanames(data) <- c("ADTTE", "iris", "ADSL")
join_keys(data) <- default_cdisc_join_keys[c("ADSL", "ADTTE")]
testthat::expect_identical(
datanames(data),
c("ADSL", "ADTTE", "iris")
)
})

testthat::test_that("datanames return topological order of datasets after datanames are called after join_keys", {
data <- within(teal_data(), {
ADTTE <- teal.data::rADTTE # nolint: object_name.
iris <- iris
ADSL <- teal.data::rADSL # nolint: object_name.
})

join_keys(data) <- default_cdisc_join_keys[c("ADSL", "ADTTE")]
datanames(data) <- c("ADTTE", "iris", "ADSL")

testthat::expect_identical(
datanames(data),
c("ADSL", "ADTTE", "iris")
)
})


testthat::test_that("datanames return parent if join_keys were provided and parent exists in env", {
data <- within(teal_data(), {
ADTTE <- teal.data::rADTTE # nolint: object_name.
iris <- iris
ADSL <- teal.data::rADSL # nolint: object_name.
})

join_keys(data) <- default_cdisc_join_keys[c("ADSL", "ADTTE")]
datanames(data) <- c("ADTTE", "iris")

testthat::expect_identical(
datanames(data),
c("ADSL", "ADTTE", "iris")
)
})

testthat::test_that("datanames do not return parent if join_keys were provided and parent did not exists in env", {
data <- teal_data(
ADTTE = teal.data::rADTTE, # nolint: object_name.
iris = iris
)

join_keys(data) <- default_cdisc_join_keys[c("ADSL", "ADTTE")]

testthat::expect_identical(
datanames(data),
c("ADTTE", "iris")
)
})
30 changes: 18 additions & 12 deletions tests/testthat/test-teal_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,25 @@ testthat::test_that("teal_data initializes teal_data object with @datanames take
)
})

testthat::test_that("teal_data initializes teal_data object with @datanames taken from passed join_keys", {
testthat::expect_identical(
teal_data(join_keys = join_keys(join_key("parent", "child", "id")))@datanames,
c("parent", "child")
)
})
testthat::test_that(
"teal_data initializes teal_data object without @datanames taken from join_keys if objects did not exist in env",
{
testthat::expect_identical(
datanames(teal_data(join_keys = join_keys(join_key("parent", "child", "id")))),
character(0)
)
}
)

testthat::test_that("teal_data initializes teal_data object with @datanames taken from join_keys and passed objects", {
testthat::expect_identical(
teal_data(iris = iris, join_keys = join_keys(join_key("parent", "child", "id")))@datanames,
c("iris", "parent", "child")
)
})
testthat::test_that(
"teal_data initializes teal_data object with @datanames taken only from passed objects and not join_keys",
{
testthat::expect_identical(
datanames(teal_data(iris = iris, join_keys = join_keys(join_key("parent", "child", "id")))),
"iris"
)
}
)


testthat::test_that("teal_data returns teal_data when data passed as named list", {
Expand Down

0 comments on commit d240c22

Please sign in to comment.