From 497435d5a6aa4852e4b02f0a00da7c870d683690 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Fri, 8 Dec 2023 17:23:42 +0100 Subject: [PATCH 01/27] feat: initial support --- R/join_key.R | 16 ++++++++-- R/join_keys-c.R | 4 +-- R/join_keys-extract.R | 23 ++++++++++---- R/join_keys-parents.R | 2 +- R/join_keys-utils.R | 2 +- R/join_keys.R | 8 ++--- man/join_key.Rd | 2 +- man/join_keys.Rd | 16 +++++----- tests/testthat/test-join_keys-extract.R | 42 +++++++++++++++---------- tests/testthat/test-join_keys-parents.R | 20 ++++++------ tests/testthat/test-join_keys-print.R | 34 ++++++++++---------- tests/testthat/test-join_keys.R | 11 +++---- 12 files changed, 104 insertions(+), 76 deletions(-) diff --git a/R/join_key.R b/R/join_key.R index 9d134c366..b027ad24c 100644 --- a/R/join_key.R +++ b/R/join_key.R @@ -25,7 +25,8 @@ #' join_key("d1", "d2", c("A")) #' join_key("d1", "d2", c("A" = "B")) #' join_key("d1", "d2", c("A" = "B", "C")) -join_key <- function(dataset_1, dataset_2 = dataset_1, keys) { +join_key <- function(dataset_1, dataset_2 = dataset_1, keys, parent = "dataset_1") { + checkmate::assert_choice(parent, choices = c("dataset_1", "dataset_2", "none")) checkmate::assert_string(dataset_1) checkmate::assert_string(dataset_2) checkmate::assert_character(keys, any.missing = FALSE) @@ -65,6 +66,16 @@ join_key <- function(dataset_1, dataset_2 = dataset_1, keys) { keys <- NULL } + if (dataset_1 == dataset_2) { + parent <- "none" + } + + new_parents <- switch(parent, + dataset_1 = structure(list(dataset_1), names = dataset_2), + dataset_2 = structure(list(dataset_2), names = dataset_1), + list() + ) + structure( list( structure( @@ -73,6 +84,7 @@ join_key <- function(dataset_1, dataset_2 = dataset_1, keys) { ) ), names = dataset_1, - class = "join_key_set" + class = "join_key_set", + parents = new_parents ) } diff --git a/R/join_keys-c.R b/R/join_keys-c.R index 576d028b9..d1f515b62 100644 --- a/R/join_keys-c.R +++ b/R/join_keys-c.R @@ -10,7 +10,7 @@ #' jk, #' join_keys( #' join_key("ds4", keys = c("pk4", "pk4_2")), -#' join_key("ds4", "ds3", c(pk4_2 = "pk3")) +#' join_key("ds3", "ds4", c(pk3 = "pk4_2")) #' ) #' ) c.join_keys <- function(...) { @@ -50,7 +50,7 @@ c.join_keys <- function(...) { #' jk_merged <- c( #' jk_merged, #' join_key("ds5", keys = "pk5"), -#' join_key("ds5", "ds1", c(pk5 = "pk1")) +#' join_key("ds1", "ds5", c(pk1 = "pk5")) #' ) c.join_key_set <- function(...) { c.join_keys(...) diff --git a/R/join_keys-extract.R b/R/join_keys-extract.R index 9b0d1706f..2255f712d 100644 --- a/R/join_keys-extract.R +++ b/R/join_keys-extract.R @@ -134,12 +134,13 @@ #' #' # Setting a single relationship pair --- #' -#' jk["ds4", "ds1"] <- c("pk4" = "pk1") +#' jk["ds1", "ds4"] <- c("pk1" = "pk4") #' #' # Removing a key --- #' #' jk["ds5", "ds5"] <- NULL -`[<-.join_keys` <- function(x, i, j, value) { +`[<-.join_keys` <- function(x, i, j, value, parent = "i") { + checkmate::assert_choice(parent, choices = c("i", "j", "none")) if (missing(i) || missing(j)) { stop("join_keys[i, j] specify both indices to set a key pair.") } else if (!missing(i) && is.null(i) || !missing(j) && is.null(j)) { @@ -163,8 +164,13 @@ ) } - x[[i]][[j]] <- value - x + parent_conversion <- switch(parent, + i = "dataset_1", + j = "dataset_2", + "none" + ) + + c(x, join_key(i, j, value, parent_conversion)) } #' @noRd @@ -234,9 +240,14 @@ # Remove classes to use list-based get/assign operations new_x <- unclass(x) - # In case a pair is removed, also remove the symmetric pair + # In case a pair is removed, also remove the symmetric pair and update parents removed_names <- setdiff(names(new_x[[i]]), names(norm_value)) - for (.x in removed_names) new_x[[.x]][[i]] <- NULL + for (.x in removed_names) { + if (identical(parent(x, .x), i)) attr(new_x, "parents")[[.x]] <- NULL + if (identical(parent(x, i), .x)) attr(new_x, "parents")[[i]] <- NULL + + new_x[[.x]][[i]] <- NULL + } new_x[[i]] <- norm_value diff --git a/R/join_keys-parents.R b/R/join_keys-parents.R index c48376e32..7a89334c2 100644 --- a/R/join_keys-parents.R +++ b/R/join_keys-parents.R @@ -73,7 +73,7 @@ parents.teal_data <- function(x) { #' parents(jk)["ds6"] <- "ds5" #' parents(jk)["ds7"] <- "ds6" `parents<-.join_keys` <- function(x, value) { - checkmate::assert_list(value, types = "character", names = "named") + checkmate::assert_list(value, types = c("character"), names = "named") new_parents <- list() diff --git a/R/join_keys-utils.R b/R/join_keys-utils.R index c5289864e..005ae3bc9 100644 --- a/R/join_keys-utils.R +++ b/R/join_keys-utils.R @@ -115,7 +115,7 @@ update_keys_given_parents <- function(x) { d1_pk <- jk[[d1]][[d1]] d1_parent <- parent(jk, d1) for (d2 in datanames) { - if (paste(d2, d1) %in% duplicate_pairs) { + if (identical(d2, d1) || paste(d2, d1) %in% duplicate_pairs) { next } if (length(jk[[d1]][[d2]]) == 0) { diff --git a/R/join_keys.R b/R/join_keys.R index 860b97345..046491cce 100644 --- a/R/join_keys.R +++ b/R/join_keys.R @@ -41,8 +41,8 @@ #' join_key("ds1", "ds1", "pk1"), #' join_key("ds2", "ds2", "pk2"), #' join_key("ds3", "ds3", "pk3"), -#' join_key("ds2", "ds1", c(pk2 = "pk1")), -#' join_key("ds3", "ds1", c(pk3 = "pk1")) +#' join_key("ds1", "ds2", c(pk1 = "pk2")), +#' join_key("ds1", "ds3", c(pk1 = "pk3")) #' ) #' #' jk @@ -114,8 +114,8 @@ join_keys.teal_data <- function(...) { #' join_keys(obj)["ds1", "ds1"] <- "pk1" #' join_keys(obj)["ds2", "ds2"] <- "pk2" #' join_keys(obj)["ds3", "ds3"] <- "pk3" -#' join_keys(obj)["ds2", "ds1"] <- c(pk2 = "pk1") -#' join_keys(obj)["ds3", "ds1"] <- c(pk3 = "pk1") +#' join_keys(obj)["ds1", "ds2"] <- c(pk1 = "pk2") +#' join_keys(obj)["ds1", "ds3"] <- c(pk1 = "pk3") #' #' identical(jk, join_keys(obj)) `join_keys<-.join_keys` <- function(x, value) { diff --git a/man/join_key.Rd b/man/join_key.Rd index f31fe1f0f..2be02c117 100644 --- a/man/join_key.Rd +++ b/man/join_key.Rd @@ -4,7 +4,7 @@ \alias{join_key} \title{Create a relationship between a pair of datasets} \usage{ -join_key(dataset_1, dataset_2 = dataset_1, keys) +join_key(dataset_1, dataset_2 = dataset_1, keys, parent = "dataset_1") } \arguments{ \item{dataset_1, dataset_2}{(\code{character(1)}) dataset names. If \code{dataset_2} is omitted, diff --git a/man/join_keys.Rd b/man/join_keys.Rd index 2fcc03a5f..9fb564ebd 100644 --- a/man/join_keys.Rd +++ b/man/join_keys.Rd @@ -28,7 +28,7 @@ join_keys(...) \method{[}{join_keys}(x, i, j) -\method{[}{join_keys}(x, i, j) <- value +\method{[}{join_keys}(x, i, j, parent = "i") <- value \method{c}{join_keys}(...) @@ -115,8 +115,8 @@ jk <- join_keys( join_key("ds1", "ds1", "pk1"), join_key("ds2", "ds2", "pk2"), join_key("ds3", "ds3", "pk3"), - join_key("ds2", "ds1", c(pk2 = "pk1")), - join_key("ds3", "ds1", c(pk3 = "pk1")) + join_key("ds1", "ds2", c(pk1 = "pk2")), + join_key("ds1", "ds3", c(pk1 = "pk3")) ) jk @@ -138,7 +138,7 @@ jk["ds5", "ds5"] <- "pk5" # Setting a single relationship pair --- -jk["ds4", "ds1"] <- c("pk4" = "pk1") +jk["ds1", "ds4"] <- c("pk1" = "pk4") # Removing a key --- @@ -150,7 +150,7 @@ jk_merged <- c( jk, join_keys( join_key("ds4", keys = c("pk4", "pk4_2")), - join_key("ds4", "ds3", c(pk4_2 = "pk3")) + join_key("ds3", "ds4", c(pk3 = "pk4_2")) ) ) @@ -159,7 +159,7 @@ jk_merged <- c( jk_merged <- c( jk_merged, join_key("ds5", keys = "pk5"), - join_key("ds5", "ds1", c(pk5 = "pk1")) + join_key("ds1", "ds5", c(pk1 = "pk5")) ) # Assigning keys via join_keys(x)[i, j] <- value ---- @@ -171,8 +171,8 @@ obj <- teal_data() join_keys(obj)["ds1", "ds1"] <- "pk1" join_keys(obj)["ds2", "ds2"] <- "pk2" join_keys(obj)["ds3", "ds3"] <- "pk3" -join_keys(obj)["ds2", "ds1"] <- c(pk2 = "pk1") -join_keys(obj)["ds3", "ds1"] <- c(pk3 = "pk1") +join_keys(obj)["ds1", "ds2"] <- c(pk1 = "pk2") +join_keys(obj)["ds1", "ds3"] <- c(pk1 = "pk3") identical(jk, join_keys(obj)) diff --git a/tests/testthat/test-join_keys-extract.R b/tests/testthat/test-join_keys-extract.R index 10d3ce850..4e81540f4 100644 --- a/tests/testthat/test-join_keys-extract.R +++ b/tests/testthat/test-join_keys-extract.R @@ -99,11 +99,14 @@ testthat::test_that("join_keys[i] ignores duplicate indexes - return only first jk <- join_keys( join_key("d1", "d1", "a"), join_key("d2", "d2", "b"), - join_key("d3", "d2", "b") + join_key("d2", "d3", "b") ) testthat::expect_equal( jk[c("d1", "d2", "d1")], - join_keys(join_key("d1", "d1", "a"), join_key("d2", "d2", "b")) + join_keys( + join_key("d1", "d1", "a"), + join_key("d2", "d2", "b") + ) ) }) @@ -313,15 +316,22 @@ testthat::test_that("[[<-.join_keys assigning NULL drops a key", { testthat::expect_null(jk[["d1"]]) }) -testthat::test_that("[[<-.join_keys adds symmetrical change to the foreign dataset", { +testthat::test_that("[[<-.join_keys adds symmetrical change without parents to the foreign dataset", { jk <- join_keys() jk[["d1"]][["d2"]] <- c("A" = "B", "C" = "C") testthat::expect_equal( jk, - join_keys( - join_key("d1", "d2", c("A" = "B", "C" = "C")), - join_key("d2", "d1", c("B" = "A", "C" = "C")) + structure( + list( + d1 = list( + d2 = c(c("A" = "B", "C" = "C")) + ), + d2 = list( + d1 = c("B" = "A", "C" = "C") + ) + ), + class = c("join_keys", "list") ) ) }) @@ -339,7 +349,7 @@ testthat::test_that("[[<- mutating non-existing keys adds them", { my_keys, join_keys( join_key("d1", "d2", "A"), - join_key("d2", "d3", "B") + join_key("d2", "d3", "B", parent = "none") # [[<- doesn't set parent ) ) }) @@ -373,21 +383,19 @@ testthat::test_that("[[<-.join_keys removes keys with NULL", { ) }) -testthat::test_that("[[<-.join_keys removes keys with NULL and applies summetrical changes", { +testthat::test_that("[[<-.join_keys removes keys with NULL and applies symmetrical changes", { my_keys <- join_keys( join_key("d1", "d2", "A"), - join_key("d2", "d1", "A"), - join_key("d2", "d3", "B"), - join_key("d3", "d2", "B") + join_key("d2", "d3", "B") ) my_keys[["d1"]][["d2"]] <- NULL - testthat::expect_identical( + expect_null(my_keys["d1", "d2"]) + expect_null(my_keys["d2", "d1"]) + + expect_equal( my_keys, - join_keys( - join_key("d2", "d3", "B"), - join_key("d3", "d2", "B") - ) + join_keys(join_key("d2", "d3", "B")) ) }) @@ -422,5 +430,5 @@ testthat::test_that("[[<-.join_keys fails when provided foreign key pairs for sa testthat::test_that("[[<-.join_keys allows when provided foreign key pairs for same datasets and same keys", { jk <- join_keys() testthat::expect_silent(jk[["ds1"]] <- list(ds2 = "new", ds2 = c("new" = "new"))) - testthat::expect_equal(jk, join_keys(join_key("ds1", "ds2", "new"))) + testthat::expect_equal(jk, join_keys(join_key("ds1", "ds2", "new", parent = "none"))) }) diff --git a/tests/testthat/test-join_keys-parents.R b/tests/testthat/test-join_keys-parents.R index 8c8a44502..d01e27630 100644 --- a/tests/testthat/test-join_keys-parents.R +++ b/tests/testthat/test-join_keys-parents.R @@ -19,8 +19,8 @@ testthat::test_that("parents<- accepts a named list containing (non-empty, non-m testthat::test_that("parents<- single parent can be changed utilizing list functionality with [[<-", { jk <- join_keys( - join_key("a", "b", "ab"), - join_key("c", "d", "cd") + join_key("a", "b", "ab", parent = "none"), + join_key("c", "d", "cd", parent = "none") ) parents(jk)[["a"]] <- "b" parents(jk)[["c"]] <- "d" @@ -29,16 +29,16 @@ testthat::test_that("parents<- single parent can be changed utilizing list funct testthat::test_that("parents<- dataset can't be own parent", { jk <- join_keys( - join_key("a", "b", "ab"), - join_key("c", "d", "cd") + join_key("a", "b", "ab", parent = "none"), + join_key("c", "d", "cd", parent = "none") ) testthat::expect_error(parents(jk) <- list(a = "a")) }) testthat::test_that("parents<- setting parent-child relationship fails when no foreign keys between datasets", { jk <- join_keys( - join_key("a", "1", "aa"), - join_key("b", "b", "bb") + join_key("a", "1", "aa", parent = "none"), + join_key("b", "b", "bb", parent = "none") ) testthat::expect_error(parents(jk) <- list(a = "b")) }) @@ -57,8 +57,8 @@ testthat::test_that("parents<- ensures it is a directed acyclical graph (DAG)", testthat::test_that("parents<- single parent can be changed utilizing list functionality with [[<-", { jk <- join_keys( - join_key("a", "b", "ab"), - join_key("c", "d", "cd") + join_key("a", "b", "ab", parent = "none"), + join_key("c", "d", "cd", parent = "none") ) parents(jk)[["a"]] <- "b" parents(jk)[["c"]] <- "d" @@ -67,7 +67,7 @@ testthat::test_that("parents<- single parent can be changed utilizing list funct }) testthat::test_that("parents<- fails when value isn't a list (non-empty, non-missing) character", { - jk <- join_keys(join_key("a", "b", "test")) + jk <- join_keys(join_key("a", "b", "test", parent = "none")) testthat::expect_error(parents(jk) <- list(b = 1)) testthat::expect_error(parents(jk) <- list(b = NA_character_)) testthat::expect_error(parents(jk) <- list(b = NULL)) @@ -94,7 +94,7 @@ testthat::test_that("parents<- sets parent datasets to join_keys kept in teal_da testthat::test_that("parents<- setting parents changes join_keys object", { jk <- join_keys(join_key("a", "b", "ab")) jk2 <- jk - parents <- list(b = "a") + parents <- list(a = "b") parents(jk) <- parents testthat::expect_failure(testthat::expect_identical(jk, jk2)) diff --git a/tests/testthat/test-join_keys-print.R b/tests/testthat/test-join_keys-print.R index dd3c51e95..c4bb76a39 100644 --- a/tests/testthat/test-join_keys-print.R +++ b/tests/testthat/test-join_keys-print.R @@ -5,18 +5,18 @@ testthat::test_that("format.join_keys for empty set", { testthat::test_that("format.join_keys with empty parents", { my_keys <- join_keys( - join_key("d1", "d1", "a"), - join_key("d2", "d2", "b"), - join_key("d3", "d3", "c"), - join_key("d2", "d1", "ba"), - join_key("d3", "d2", "ca") + join_key("d1", "d1", "a", parent = "none"), + join_key("d2", "d2", "b", parent = "none"), + join_key("d3", "d3", "c", parent = "none"), + join_key("d1", "d2", "ab", parent = "none"), + join_key("d2", "d3", "ac", parent = "none") ) testthat::expect_identical( format(my_keys), paste( "A join_keys object containing foreign keys between 3 datasets:", - "d1: [a]", " <-> d2: [ba]", "d2: [b]", " <-> d1: [ba]", " <-> d3: [ca]", - "d3: [c]", " <-> d2: [ca]", + "d1: [a]", " <-> d2: [ab]", "d2: [b]", " <-> d1: [ab]", " <-> d3: [ac]", + "d3: [c]", " <-> d2: [ac]", sep = "\n" ) ) @@ -27,16 +27,16 @@ testthat::test_that("format.join_keys for parents", { join_key("d1", "d1", "a"), join_key("d2", "d2", "b"), join_key("d3", "d3", "c"), - join_key("d2", "d1", "ba"), - join_key("d3", "d2", "ca") + join_key("d1", "d2", "ab"), + join_key("d2", "d3", "ac") ) - parents(my_keys) <- list("d2" = "d1", "d3" = "d2") + testthat::expect_identical( format(my_keys), paste( "A join_keys object containing foreign keys between 3 datasets:", - "d1: [a]", " <-- d2: [ba]", "d2: [b]", " --> d1: [ba]", " <-- d3: [ca]", - "d3: [c]", " --> d2: [ca]", + "d1: [a]", " <-- d2: [ab]", "d2: [b]", " --> d1: [ab]", " <-- d3: [ac]", + "d3: [c]", " --> d2: [ac]", sep = "\n" ) ) @@ -47,10 +47,10 @@ testthat::test_that("format.join_keys print inferred keys for children sharing p join_key("d1", "d1", "a"), join_key("d2", "d2", "b"), join_key("d3", "d3", "c"), - join_key("d2", "d1", "child-a"), - join_key("d3", "d1", "child-a") + join_key("d1", "d2", "child-a"), + join_key("d1", "d3", "child-a") ) - parents(my_keys) <- list("d2" = "d1", "d3" = "d1") + testthat::expect_identical( format(my_keys), paste( @@ -68,8 +68,8 @@ testthat::test_that("print.join_keys produces output same as format", { join_key("d1", "d1", "a"), join_key("d2", "d2", "b"), join_key("d3", "d3", "c"), - join_key("d2", "d1", "ba"), - join_key("d3", "d2", "ca") + join_key("d1", "d2", "ab"), + join_key("d2", "d3", "ac") ) testthat::expect_output(print(my_keys), format(my_keys), fixed = TRUE) }) diff --git a/tests/testthat/test-join_keys.R b/tests/testthat/test-join_keys.R index 6b6b280f7..4dac79670 100644 --- a/tests/testthat/test-join_keys.R +++ b/tests/testthat/test-join_keys.R @@ -98,26 +98,23 @@ testthat::test_that("join_keys fails when provided foreign key pairs have incomp testthat::test_that("join_keys constructor adds symmetric keys on given (unnamed) foreign key", { my_keys <- join_keys(join_key("d1", "d2", "a")) - testthat::expect_identical( + testthat::expect_equal( my_keys, - join_keys(join_key("d1", "d2", "a"), join_key("d2", "d1", "a")) + join_keys(join_key("d2", "d1", "a", parent = "dataset_2")) ) }) testthat::test_that("join_keys constructor adds symmetric keys on given (named) foreign key", { - testthat::expect_identical( + testthat::expect_equal( join_keys( join_key("d1", "d2", c(a = "b")) ), join_keys( - join_key("d1", "d2", c(a = "b")), - join_key("d2", "d1", c(b = "a")) + join_key("d2", "d1", c(b = "a"), parent = "dataset_2") ) ) }) - - # join_keys.<- ---------------------------------------------------------------- testthat::test_that("join_keys<-.join_keys overwrites existing join_keys", { my_keys <- join_keys(join_key("d1", "d1", "a"), join_key("d2", "d2", "b")) From 1b3f662c49d4f6861da04f9911edcb3ef7604bc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Mon, 11 Dec 2023 13:01:29 +0100 Subject: [PATCH 02/27] feat: use match.arg and keep checkmate error --- R/join_key.R | 6 +++++- R/join_keys-extract.R | 7 ++++++- inst/WORDLIST | 7 ++++--- man/join_key.Rd | 11 ++++++++++- man/join_keys.Rd | 6 +++++- 5 files changed, 30 insertions(+), 7 deletions(-) diff --git a/R/join_key.R b/R/join_key.R index b027ad24c..f31c9da4c 100644 --- a/R/join_key.R +++ b/R/join_key.R @@ -14,6 +14,9 @@ #' #' If any element of the `keys` vector is empty with a non-empty name, then the name is #' used for both datasets. +#' @param parent (`character(1)`) indicates which dataset is the parent in the +#' relationship or `none` if it is an undirected relationship. One of `dataset_1`, +#' `dataset_2` or `none`. #' #' @return object of class `join_key_set` to be passed into `join_keys` function. #' @@ -25,7 +28,8 @@ #' join_key("d1", "d2", c("A")) #' join_key("d1", "d2", c("A" = "B")) #' join_key("d1", "d2", c("A" = "B", "C")) -join_key <- function(dataset_1, dataset_2 = dataset_1, keys, parent = "dataset_1") { +join_key <- function(dataset_1, dataset_2 = dataset_1, keys, parent = c("dataset_1", "dataset_2", "none")) { + parent <- tryCatch(match.arg(parent), error = function(err) parent) checkmate::assert_choice(parent, choices = c("dataset_1", "dataset_2", "none")) checkmate::assert_string(dataset_1) checkmate::assert_string(dataset_2) diff --git a/R/join_keys-extract.R b/R/join_keys-extract.R index 2255f712d..c1e3c8a6d 100644 --- a/R/join_keys-extract.R +++ b/R/join_keys-extract.R @@ -117,6 +117,10 @@ #' @rdname join_keys #' @order 2 #' +#' @param parent (`character(1)`) indicates which dataset is the parent in the +#' relationship or `none` if it is an undirected relationship. One of `i`, +#' `j` or `none`. +#' #' @section Functions: #' - `x[i, j] <- value`: Assignment of a key to pair `(i, j)`. #' - `x[i] <- value`: This (without `j` parameter) **is not** a supported @@ -139,7 +143,8 @@ #' # Removing a key --- #' #' jk["ds5", "ds5"] <- NULL -`[<-.join_keys` <- function(x, i, j, value, parent = "i") { +`[<-.join_keys` <- function(x, i, j, value, parent = c("i", "j", "none")) { + parent <- tryCatch(match.arg(parent), error = function(err) parent) checkmate::assert_choice(parent, choices = c("i", "j", "none")) if (missing(i) || missing(j)) { stop("join_keys[i, j] specify both indices to set a key pair.") diff --git a/inst/WORDLIST b/inst/WORDLIST index 59eaa668b..a30fc4043 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,10 +1,11 @@ Forkers -Getter -Hoffmann -Reproducibility formatters funder +Getter getter +Hoffmann pre repo +Reproducibility reproducibility +undirected diff --git a/man/join_key.Rd b/man/join_key.Rd index 2be02c117..a9aabc403 100644 --- a/man/join_key.Rd +++ b/man/join_key.Rd @@ -4,7 +4,12 @@ \alias{join_key} \title{Create a relationship between a pair of datasets} \usage{ -join_key(dataset_1, dataset_2 = dataset_1, keys, parent = "dataset_1") +join_key( + dataset_1, + dataset_2 = dataset_1, + keys, + parent = c("dataset_1", "dataset_2", "none") +) } \arguments{ \item{dataset_1, dataset_2}{(\code{character(1)}) dataset names. If \code{dataset_2} is omitted, @@ -17,6 +22,10 @@ If unnamed, the same column names are used for both datasets. If any element of the \code{keys} vector is empty with a non-empty name, then the name is used for both datasets.} + +\item{parent}{(\code{character(1)}) indicates which dataset is the parent in the +relationship or \code{none} if it is an undirected relationship. One of \code{dataset_1}, +\code{dataset_2} or \code{none}.} } \value{ object of class \code{join_key_set} to be passed into \code{join_keys} function. diff --git a/man/join_keys.Rd b/man/join_keys.Rd index 9fb564ebd..a26db7c66 100644 --- a/man/join_keys.Rd +++ b/man/join_keys.Rd @@ -28,7 +28,7 @@ join_keys(...) \method{[}{join_keys}(x, i, j) -\method{[}{join_keys}(x, i, j, parent = "i") <- value +\method{[}{join_keys}(x, i, j, parent = c("i", "j", "none")) <- value \method{c}{join_keys}(...) @@ -59,6 +59,10 @@ a character vector, but it can also take numeric, logical, \code{NULL} or missin \item{value}{(\code{join_key_set} or list of \code{join_key_set}) relationship pairs to add to \code{join_keys} list.} + +\item{parent}{(\code{character(1)}) indicates which dataset is the parent in the +relationship or \code{none} if it is an undirected relationship. One of \code{i}, +\code{j} or \code{none}.} } \value{ \code{join_keys} object. From a78128406a746d6075a0368365133253a54cd3ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Mon, 11 Dec 2023 13:16:17 +0100 Subject: [PATCH 03/27] fix: remove redundant line --- R/join_key.R | 3 +-- R/join_keys-extract.R | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/R/join_key.R b/R/join_key.R index f31c9da4c..8386dd46e 100644 --- a/R/join_key.R +++ b/R/join_key.R @@ -29,8 +29,7 @@ #' join_key("d1", "d2", c("A" = "B")) #' join_key("d1", "d2", c("A" = "B", "C")) join_key <- function(dataset_1, dataset_2 = dataset_1, keys, parent = c("dataset_1", "dataset_2", "none")) { - parent <- tryCatch(match.arg(parent), error = function(err) parent) - checkmate::assert_choice(parent, choices = c("dataset_1", "dataset_2", "none")) + parent <- checkmate::matchArg(parent, choices = c("dataset_1", "dataset_2", "none"), .var.name = "parent") checkmate::assert_string(dataset_1) checkmate::assert_string(dataset_2) checkmate::assert_character(keys, any.missing = FALSE) diff --git a/R/join_keys-extract.R b/R/join_keys-extract.R index c1e3c8a6d..666cf65df 100644 --- a/R/join_keys-extract.R +++ b/R/join_keys-extract.R @@ -144,8 +144,7 @@ #' #' jk["ds5", "ds5"] <- NULL `[<-.join_keys` <- function(x, i, j, value, parent = c("i", "j", "none")) { - parent <- tryCatch(match.arg(parent), error = function(err) parent) - checkmate::assert_choice(parent, choices = c("i", "j", "none")) + parent <- checkmate::matchArg(parent, choices = c("i", "j", "none"), .var.name = "parent") if (missing(i) || missing(j)) { stop("join_keys[i, j] specify both indices to set a key pair.") } else if (!missing(i) && is.null(i) || !missing(j) && is.null(j)) { From 9e5abc862d9e0ee19150da3fa83b6ae5120106f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Mon, 11 Dec 2023 14:18:01 +0100 Subject: [PATCH 04/27] feat: protect against cyclical graphs on const./merge --- R/join_keys-c.R | 4 ++-- R/join_keys-parents.R | 4 ++++ tests/testthat/test-join_keys-c.R | 6 ++++++ tests/testthat/test-join_keys-parents.R | 14 +++++++------- tests/testthat/test-join_keys.R | 18 +++++++++++++++++- 5 files changed, 36 insertions(+), 10 deletions(-) diff --git a/R/join_keys-c.R b/R/join_keys-c.R index d1f515b62..b0cc7a9f5 100644 --- a/R/join_keys-c.R +++ b/R/join_keys-c.R @@ -28,13 +28,13 @@ c.join_keys <- function(...) { f = function(.x, .y) { assert_compatible_keys2(.x, .y) out <- utils::modifyList(.x, .y, keep.null = FALSE) - attr(out, "parents") <- .merge_parents(.x, .y) + parents(out) <- .merge_parents(.x, .y) out } ) out <- utils::modifyList(join_keys_obj, x_merged, keep.null = FALSE) - attr(out, "parents") <- .merge_parents(join_keys_obj, x_merged) + parents(out) <- .merge_parents(join_keys_obj, x_merged) out } diff --git a/R/join_keys-parents.R b/R/join_keys-parents.R index 7a89334c2..796a47dbd 100644 --- a/R/join_keys-parents.R +++ b/R/join_keys-parents.R @@ -78,6 +78,10 @@ parents.teal_data <- function(x) { new_parents <- list() for (dataset in names(value)) { + if (checkmate::test_scalar_na(value[[dataset]])) { + checkmate::assert("May not contain `NA_character_`", .var.name = "value") + } + parent <- new_parents[[dataset]] checkmate::assert( checkmate::check_null(parent), diff --git a/tests/testthat/test-join_keys-c.R b/tests/testthat/test-join_keys-c.R index 234a8fe69..c9390e56f 100644 --- a/tests/testthat/test-join_keys-c.R +++ b/tests/testthat/test-join_keys-c.R @@ -224,3 +224,9 @@ testthat::test_that("c.join_keys merges existing parents are overwritten", { testthat::expect_identical(c(jk1, jk2), expected) }) + +testthat::test_that("c.join_keys throws error when merge produces acyclical graph", { + jk1 <- join_keys(join_key("d1", "d2", "a")) + jk2 <- join_keys(join_key("d2", "d1", "a")) + expect_error(c(jk1, jk2), "Cycle detected in a parent and child dataset graph") +}) diff --git a/tests/testthat/test-join_keys-parents.R b/tests/testthat/test-join_keys-parents.R index d01e27630..e19c58b99 100644 --- a/tests/testthat/test-join_keys-parents.R +++ b/tests/testthat/test-join_keys-parents.R @@ -45,9 +45,9 @@ testthat::test_that("parents<- setting parent-child relationship fails when no f testthat::test_that("parents<- ensures it is a directed acyclical graph (DAG)", { cyclic_jk <- join_keys( - join_key("a", "b", "id"), - join_key("b", "c", "id"), - join_key("c", "a", "id") + join_key("a", "b", "id", parent = "none"), + join_key("b", "c", "id", parent = "none"), + join_key("c", "a", "id", parent = "none") ) testthat::expect_error( parents(cyclic_jk) <- list(a = "b", b = "c", c = "a"), @@ -68,10 +68,10 @@ testthat::test_that("parents<- single parent can be changed utilizing list funct testthat::test_that("parents<- fails when value isn't a list (non-empty, non-missing) character", { jk <- join_keys(join_key("a", "b", "test", parent = "none")) - testthat::expect_error(parents(jk) <- list(b = 1)) - testthat::expect_error(parents(jk) <- list(b = NA_character_)) - testthat::expect_error(parents(jk) <- list(b = NULL)) - testthat::expect_error(parents(jk) <- NULL) + testthat::expect_error(parents(jk) <- list(b = 1), "May only contain the following types") + testthat::expect_error(parents(jk) <- list(b = NA_character_), "May not contain") + testthat::expect_error(parents(jk) <- list(b = NULL), "May only contain the following types") + testthat::expect_error(parents(jk) <- NULL, "Must be of type 'list'") }) testthat::test_that("parents<- setting parents again overwrites previous state", { diff --git a/tests/testthat/test-join_keys.R b/tests/testthat/test-join_keys.R index 4dac79670..1757b868b 100644 --- a/tests/testthat/test-join_keys.R +++ b/tests/testthat/test-join_keys.R @@ -48,6 +48,16 @@ testthat::test_that("join_keys is a collection of join_key, ie named list with n ) }) +testthat::test_that("join_keys cannot create acyclical graph", { + expect_error( + join_keys( + join_key("d1", "d2", "A"), + join_key("d2", "d1", "A") + ), + "Cycle detected in a parent and child dataset graph" + ) +}) + testthat::test_that("join_keys.teal_data returns join_keys object from teal_data", { obj <- teal_data(join_keys = join_keys(join_key("d1", "d1", "a"))) testthat::expect_identical(obj@join_keys, join_keys(obj)) @@ -60,7 +70,13 @@ testthat::test_that("join_keys.join_keys returns itself", { testthat::test_that("join_keys accepts duplicated join_key", { testthat::expect_no_error( - join_keys(join_key("d1", "d2", "a"), join_key("d2", "d1", "a")) + join_keys(join_key("d1", "d2", "a"), join_key("d1", "d2", "a")) + ) +}) + +testthat::test_that("join_keys accepts duplicated join_key (undirected)", { + testthat::expect_no_error( + join_keys(join_key("d1", "d2", "a", parent = "none"), join_key("d1", "d2", "a", parent = "none")) ) }) From 8e5136d0b2a78ab0d7776afa34ad6e7847098e3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Mon, 11 Dec 2023 14:34:17 +0100 Subject: [PATCH 05/27] docs: initial pass on vignette --- R/join_keys-extract.R | 6 ++++++ vignettes/join-keys.Rmd | 25 ++++++++++--------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/R/join_keys-extract.R b/R/join_keys-extract.R index 666cf65df..4f867e5ad 100644 --- a/R/join_keys-extract.R +++ b/R/join_keys-extract.R @@ -168,6 +168,12 @@ ) } + # Handle join key removal separately + if (is.null(value)) { + x[[i]][[j]] <- NULL + return(x) + } + parent_conversion <- switch(parent, i = "dataset_1", j = "dataset_2", diff --git a/vignettes/join-keys.Rmd b/vignettes/join-keys.Rmd index b7f66f4f4..d5306fc49 100644 --- a/vignettes/join-keys.Rmd +++ b/vignettes/join-keys.Rmd @@ -49,23 +49,20 @@ As the constructor it is used to specify keys `join_keys(...)` as a collection o Note that join keys are assumed to be symmetric, i.e., `join_key("ds1", "ds2", c("ds1_col" = "ds2_col"))` establishes a relationship from "x" to "y" and vice versa. +By default the new joining key will set the dataset defined in the first index as the parent. + ```{r, results="hide", message=FALSE, tidy=FALSE} library(teal.data) jk <- join_keys( join_key("ds1", keys = "col_1"), # ds1: [col_1] join_key("ds2", keys = c("col_1", "col_2")), # ds2: [col_1, col_2] join_key("ds3", keys = c("col_1", "col_3")), # ds3: [col_1, col_3] - join_key("ds1", "ds2", keys = "col_1"), # ds1 <--> ds2 - join_key("ds1", "ds3", keys = "col_1"), # ds1 <--> ds3 - join_key("ds4", "ds5", keys = c("col_4" = "col_5")) # ds4 <--> ds5 + join_key("ds1", "ds2", keys = "col_1"), # ds1 <-- ds2 + join_key("ds1", "ds3", keys = "col_1"), # ds1 <-- ds3 + join_key("ds4", "ds5", keys = c("col_4" = "col_5"), parent = "none") # ds4 <--> ds5 ) -# The parent of ds2 and ds3 is ds1 -# converts relationship to child-parent -# ds1 <--> ds2 becomes ds1 <-- ds2 -# ds1 <--> ds3 becomes ds1 <-- ds3 -parents(jk) <- list(ds2 = "ds1", ds3 = "ds1") - +# The parent-child relationships are created automatically (unless 'parent' parameter is "none") jk ``` @@ -102,14 +99,13 @@ jk["ds1", "ds1"] jk["ds4", "ds5"] ``` -Note that there is a symmetry between `ds4` and `ds5` relationship: +Note that there is a symmetry in the keys between `ds4` and `ds5` relationship: ```{r} jk["ds5", "ds4"] jk["ds5", "ds4"] ``` - When only 1 argument is used this operator will return a `join_keys` object that is filtered accordingly. ```{r} @@ -120,14 +116,13 @@ jk jk["ds1"] ``` - Modifying or adding a key uses the same notation with the assignment operator `<-`. -A symmetric relationship will be created automatically. +A symmetric relationship will be created automatically, where the parent (by default) will be the dataset defined on the first index. Assigning `NULL` value will delete the relationship. ```{r} -# Adding a new ds5 <-> ds1 key -jk["ds5", "ds1"] <- "a_column" +# Adding a new ds5 <-- ds1 key +jk["ds1", "ds5"] <- "a_column" # Removing an existing key jk["ds4", "ds5"] <- NULL From 6c01c19df4fe9d2c87f5843110935a98291df5e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Mon, 11 Dec 2023 14:52:35 +0100 Subject: [PATCH 06/27] fix: correcting check warnings --- R/join_keys-extract.R | 2 +- R/join_keys.R | 7 +++++-- man/join_keys.Rd | 8 +++++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/R/join_keys-extract.R b/R/join_keys-extract.R index 4f867e5ad..3e948bab0 100644 --- a/R/join_keys-extract.R +++ b/R/join_keys-extract.R @@ -143,7 +143,7 @@ #' # Removing a key --- #' #' jk["ds5", "ds5"] <- NULL -`[<-.join_keys` <- function(x, i, j, value, parent = c("i", "j", "none")) { +`[<-.join_keys` <- function(x, i, j, parent = c("i", "j", "none"), value) { parent <- checkmate::matchArg(parent, choices = c("i", "j", "none"), .var.name = "parent") if (missing(i) || missing(j)) { stop("join_keys[i, j] specify both indices to set a key pair.") diff --git a/R/join_keys.R b/R/join_keys.R index 046491cce..638f0148a 100644 --- a/R/join_keys.R +++ b/R/join_keys.R @@ -24,6 +24,11 @@ #' either `teal_data` or `join_keys` to extract `join_keys`, \cr #' or any number of `join_key_set` objects to create `join_keys`, \cr #' or nothing to create an empty `join_keys` +#' @param value (named/unnamed `character`) key. +#' +#' \[for `join_keys(x) <- value`\]: (`join_key_set` or list of `join_key_set`) relationship +#' pairs to add to `join_keys` list. +#' #' #' @return `join_keys` object. #' @@ -91,8 +96,6 @@ join_keys.teal_data <- function(...) { #' @param x (`join_keys`) empty object to set the new relationship pairs. #' `x` is typically an object of `join_keys` class. When called with the `join_keys(x)` #' or `join_keys(x) <- value` then it can also take a supported class (`teal_data`, `join_keys`) -#' @param value (`join_key_set` or list of `join_key_set`) relationship pairs to add -#' to `join_keys` list. #' #' @export `join_keys<-` <- function(x, value) { diff --git a/man/join_keys.Rd b/man/join_keys.Rd index a26db7c66..5b2ad721f 100644 --- a/man/join_keys.Rd +++ b/man/join_keys.Rd @@ -57,12 +57,14 @@ or \code{join_keys(x) <- value} then it can also take a supported class (\code{t \item{i, j}{indices specifying elements to extract or replace. Index should be a a character vector, but it can also take numeric, logical, \code{NULL} or missing.} -\item{value}{(\code{join_key_set} or list of \code{join_key_set}) relationship pairs to add -to \code{join_keys} list.} - \item{parent}{(\code{character(1)}) indicates which dataset is the parent in the relationship or \code{none} if it is an undirected relationship. One of \code{i}, \code{j} or \code{none}.} + +\item{value}{(named/unnamed \code{character}) key. + +[for \code{join_keys(x) <- value}]: (\code{join_key_set} or list of \code{join_key_set}) relationship +pairs to add to \code{join_keys} list.} } \value{ \code{join_keys} object. From ee0a0735315b15f5c08f7f8f79f69db6d8834310 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Tue, 12 Dec 2023 14:19:11 +0100 Subject: [PATCH 07/27] docs: reflecting new API --- R/join_key.R | 21 ++++++----- R/join_keys.R | 18 +++++---- man/get_join_keys.Rd | 2 +- man/join_key.Rd | 21 +++++------ man/join_keys.Rd | 24 +++++++----- vignettes/join-keys.Rmd | 82 ++++++++++++++++++++++------------------- 6 files changed, 90 insertions(+), 78 deletions(-) diff --git a/R/join_key.R b/R/join_key.R index 8386dd46e..cdab26fd0 100644 --- a/R/join_key.R +++ b/R/join_key.R @@ -2,25 +2,26 @@ #' #' @description `r lifecycle::badge("stable")` #' -#' @details `join_key()` will create a relationship for the variables on a pair -#' of datasets. +#' @description `Create a relationship for the variables on a pair of datasets. +#' It is directed by default with `dataset_1` being the parent. #' -#' @param dataset_1,dataset_2 (`character(1)`) dataset names. If `dataset_2` is omitted, -#' a primary key for `dataset_1` is created. -#' @param keys (optionally named `character`) where `names(keys)` are columns in `dataset_1` -#' corresponding to columns of `dataset_2` given by the elements of `keys`. +#' @param dataset_1,dataset_2 (`character(1)`) Dataset names. When `dataset_2` is omitted, +#' a primary key for `dataset_1` is created. +#' @param keys (optionally named `character`) Column mapping between the datasets, +#' where `names(keys)` maps columns in `dataset_1` corresponding to columns of +#' `dataset_2` given by the elements of `keys`. #' #' If unnamed, the same column names are used for both datasets. #' #' If any element of the `keys` vector is empty with a non-empty name, then the name is #' used for both datasets. -#' @param parent (`character(1)`) indicates which dataset is the parent in the -#' relationship or `none` if it is an undirected relationship. One of `dataset_1`, -#' `dataset_2` or `none`. +#' @param parent (`character(1)`) Indicates the parent dataset in a parent-child +#' relationship or `none` if it is an undirected relationship. +#' One of `dataset_1`, `dataset_2` or `none`. #' #' @return object of class `join_key_set` to be passed into `join_keys` function. #' -#' @seealso [join_keys()] +#' @seealso [join_keys()], [parents()] #' #' @export #' diff --git a/R/join_keys.R b/R/join_keys.R index 638f0148a..33fb99e54 100644 --- a/R/join_keys.R +++ b/R/join_keys.R @@ -6,13 +6,15 @@ #' join_keys(...) #' #' @description -#' `join_keys()` facilitates the creation and retrieval of relationships between datasets. -#' `join_keys` class extends a list and contains keys connecting pairs of datasets. Each element -#' of the list contains keys for specific dataset. Each dataset can have a relationship with -#' itself (primary key) and with other datasets. +#' Facilitates the creation and retrieval of relationships between datasets. +#' `join_keys` class extends a list and contains keys connecting pairs of datasets. +#' Each element of the list contains keys for specific dataset. +#' Each dataset can have a relationship with itself (primary key) and with other datasets. #' -#' Note that `join_keys` list is symmetrical, that is, when keys are set between `dat1` and `dat2` it -#' is automatically mirrored between `dat2` and `dat1`. +#' Note that `join_keys` list is symmetrical and assumes a default direction, that is: +#' when keys are set between `ds1` and `ds2`, it defines `ds1` as the parent +#' in a parent-child relationship and the mapping is automatically mirrored between +#' `ds2` and `ds1`. #' #' @section Methods (by class): #' - `join_keys()`: Returns an empty `join_keys` object when called without arguments. @@ -24,9 +26,9 @@ #' either `teal_data` or `join_keys` to extract `join_keys`, \cr #' or any number of `join_key_set` objects to create `join_keys`, \cr #' or nothing to create an empty `join_keys` -#' @param value (named/unnamed `character`) key. +#' @param value For `x[i, j, parent = "i")] <- value` (named/unnamed `character`) Column mapping between datasets. #' -#' \[for `join_keys(x) <- value`\]: (`join_key_set` or list of `join_key_set`) relationship +#' For `join_keys(x) <- value`: (`join_key_set` or list of `join_key_set`) relationship #' pairs to add to `join_keys` list. #' #' diff --git a/man/get_join_keys.Rd b/man/get_join_keys.Rd index 6f8c78594..70449feb1 100644 --- a/man/get_join_keys.Rd +++ b/man/get_join_keys.Rd @@ -12,7 +12,7 @@ get_join_keys(data, dataset_1, dataset_2 = NULL) <- value \arguments{ \item{data}{`` - object to extract the join keys} -\item{dataset_1, dataset_2}{(\code{character(1)}) dataset names. If \code{dataset_2} is omitted, +\item{dataset_1, dataset_2}{(\code{character(1)}) Dataset names. When \code{dataset_2} is omitted, a primary key for \code{dataset_1} is created.} \item{value}{value to assign} diff --git a/man/join_key.Rd b/man/join_key.Rd index a9aabc403..c3fc99f80 100644 --- a/man/join_key.Rd +++ b/man/join_key.Rd @@ -12,30 +12,29 @@ join_key( ) } \arguments{ -\item{dataset_1, dataset_2}{(\code{character(1)}) dataset names. If \code{dataset_2} is omitted, +\item{dataset_1, dataset_2}{(\code{character(1)}) Dataset names. When \code{dataset_2} is omitted, a primary key for \code{dataset_1} is created.} -\item{keys}{(optionally named \code{character}) where \code{names(keys)} are columns in \code{dataset_1} -corresponding to columns of \code{dataset_2} given by the elements of \code{keys}. +\item{keys}{(optionally named \code{character}) Column mapping between the datasets, +where \code{names(keys)} maps columns in \code{dataset_1} corresponding to columns of +\code{dataset_2} given by the elements of \code{keys}. If unnamed, the same column names are used for both datasets. If any element of the \code{keys} vector is empty with a non-empty name, then the name is used for both datasets.} -\item{parent}{(\code{character(1)}) indicates which dataset is the parent in the -relationship or \code{none} if it is an undirected relationship. One of \code{dataset_1}, -\code{dataset_2} or \code{none}.} +\item{parent}{(\code{character(1)}) Indicates the parent dataset in a parent-child +relationship or \code{none} if it is an undirected relationship. +One of \code{dataset_1}, \code{dataset_2} or \code{none}.} } \value{ object of class \code{join_key_set} to be passed into \code{join_keys} function. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} -} -\details{ -\code{join_key()} will create a relationship for the variables on a pair -of datasets. + +\verb{Create a relationship for the variables on a pair of datasets. It is directed by default with }dataset_1` being the parent. } \examples{ join_key("d1", "d2", c("A")) @@ -43,5 +42,5 @@ join_key("d1", "d2", c("A" = "B")) join_key("d1", "d2", c("A" = "B", "C")) } \seealso{ -\code{\link[=join_keys]{join_keys()}} +\code{\link[=join_keys]{join_keys()}}, \code{\link[=parents]{parents()}} } diff --git a/man/join_keys.Rd b/man/join_keys.Rd index 5b2ad721f..b43030a34 100644 --- a/man/join_keys.Rd +++ b/man/join_keys.Rd @@ -61,22 +61,26 @@ a character vector, but it can also take numeric, logical, \code{NULL} or missin relationship or \code{none} if it is an undirected relationship. One of \code{i}, \code{j} or \code{none}.} -\item{value}{(named/unnamed \code{character}) key. +\item{value}{For \verb{x[i, j, parent = "i")] <- value} (named/unnamed \code{character}) Column mapping between datasets. -[for \code{join_keys(x) <- value}]: (\code{join_key_set} or list of \code{join_key_set}) relationship -pairs to add to \code{join_keys} list.} +For \code{join_keys(x) <- value}: (\code{join_key_set} or list of \code{join_key_set}) relationship +pairs to add to \code{join_keys} list. + +[i, j, parent = "i")]: R:i,\%20j,\%20parent\%20=\%20\%22i\%22)} } \value{ \code{join_keys} object. } \description{ -\code{join_keys()} facilitates the creation and retrieval of relationships between datasets. -\code{join_keys} class extends a list and contains keys connecting pairs of datasets. Each element -of the list contains keys for specific dataset. Each dataset can have a relationship with -itself (primary key) and with other datasets. - -Note that \code{join_keys} list is symmetrical, that is, when keys are set between \code{dat1} and \code{dat2} it -is automatically mirrored between \code{dat2} and \code{dat1}. +Facilitates the creation and retrieval of relationships between datasets. +\code{join_keys} class extends a list and contains keys connecting pairs of datasets. +Each element of the list contains keys for specific dataset. +Each dataset can have a relationship with itself (primary key) and with other datasets. + +Note that \code{join_keys} list is symmetrical and assumes a default direction, that is: +when keys are set between \code{ds1} and \code{ds2}, it defines \code{ds1} as the parent +in a parent-child relationship and the mapping is automatically mirrored between +\code{ds2} and \code{ds1}. } \section{Methods (by class)}{ diff --git a/vignettes/join-keys.Rmd b/vignettes/join-keys.Rmd index d5306fc49..ed02664e2 100644 --- a/vignettes/join-keys.Rmd +++ b/vignettes/join-keys.Rmd @@ -14,42 +14,53 @@ vignette: > The `teal.data` package provides a way to define primary keys for a dataset and establish relationships with other datasets. -Each dataset can be characterized by: +Each dataset _joining keys_ can be characterized by: - Columns constituting the primary key -- Merge keys, analogous to `SQL` foreign keys +- Foreign/merge keys, analogous to `SQL` foreign keys -Typically, an application developer specifies these keys manually. However, for datasets following the `ADaM` standard, `teal.data` can automatically assign keys using the `default_cdisc_join_keys` object. +Typically, an application developer specifies these keys manually. +However, for datasets following the `ADaM` standard, `teal.data` can automatically assign keys using the `default_cdisc_join_keys` object. Refer to the section ["Joining Keys with `ADaM` Datasets"](#join-keys-in-adam-datasets) for details on using this object to select specific datasets. ##### Uses of _join_keys_ class in _teal_ applications -The primary function of `join_keys` in `teal` applications is to facilitate the seamless [merging of datasets](https://insightsengineering.github.io/teal.transform/latest-tag/articles/data-merge.html) using `teal.transform`. +The primary function of _joining keys_ in `teal` applications is to facilitate the seamless [merging of datasets](https://insightsengineering.github.io/teal.transform/latest-tag/articles/data-merge.html) using `teal.transform`. Additionally, it plays a role on the data filtering using the _[Filter Panel](https://insightsengineering.github.io/teal/main/articles/filter-panel.html)_ in a `teal` application. -The filters applied to a _(parent)_ dataset are also applied to their children. +The filters applied to a (parent) dataset are also applied to their children. ## Anatomy of Join Keys The `join_keys` object contains information about the foreign/primary keys of multiple datasets. -Each key is represented by a pair of datasets _(by name reference)_ and a named character vector that encodes the match column name between the two datasets. -In addition, a foreign key also contains a _parent-child_ attribute that is used in the "Filter Panel" as we mentioned above. +Each key is represented by a pair of datasets (by name reference) and a named character vector that encodes the column name mapping between the two datasets. +In addition, a foreign key may also contain a _parent-child_ attribute that is used in the "Filter Panel" as we mentioned above. + +A new joining key can be created as an empty object, or by defining an initial set of primary and foreign keys. -A new `join_keys` can be created as empty or by defining an initial set of primary and foreign keys. That initial object can be extended by adding/modifying/removing keys and by establishing parent-child relationships between datasets. -The `join_keys` function is used both as a constructor and as a getter. -As the getter it is used to retrieve the `join_keys` that is contained in other objects, such as `teal_data`. +##### `join_keys(...)`: Joining Keys Constructor / Getter / Setter + +Convenient function that is used both as the constructor and as the getter for joining keys. + +As the _Getter_ it is used to retrieve the joining keys that are contained in other objects, such as `teal_data`. -As the constructor it is used to specify keys `join_keys(...)` as a collection of multiple `join_key` entries: +As the _Constructor_ it is used to specify a collection of multiple individual keys (via `join_key` function). -- `join_key(dataset_1, dataset_2, key)`: specifies the relationship between two datasets: - - `dataset_1`, `dataset_2`: names of the datasets (if `dataset_2` is the same as `dataset_1`, it creates a primary key) - - `key` _(optional)_: named vector of column names +##### `join_key(dataset_1, dataset_2, key, parent)`: Individual Joining Key Constructor + +Specifies the relationship between two datasets. + +- `dataset_1`, `dataset_2`: names of the datasets (if `dataset_2` is the same as `dataset_1` or is omitted, it creates a primary key) +- `key` (optional): named vector of column names +- `parent` (optional): indicates which dataset (`"dataset_1"` or `"dataset_2"`) is the parent in a _parent-child_ relationship, or `"none"` for an undirected relationship. Note that join keys are assumed to be symmetric, i.e., `join_key("ds1", "ds2", c("ds1_col" = "ds2_col"))` establishes a relationship from "x" to "y" and vice versa. -By default the new joining key will set the dataset defined in the first index as the parent. +By default the new joining key will set the `dataset_1` as the parent. + +##### Example & Output ```{r, results="hide", message=FALSE, tidy=FALSE} library(teal.data) @@ -85,8 +96,8 @@ jk ## Accessing and Modifying keys -The _subset_ operator with 2 indices is used to retrieve the primary/foreign keys in a `join_keys`. -Both indices must be a string. +The _subset_ operator with 2 indices (`x[i, j]`) is used to retrieve the primary/foreign keys in joining keys. +Both indices must be a string denoting the dataset name. ```{r} # Using the jk object defined on "Anatomy of Join Keys" @@ -114,6 +125,9 @@ jk # Getting primary key of "ds1" jk["ds1"] + +# Getting keys of "ds1" and "ds2" +jk[c("ds1", "ds2")] ``` Modifying or adding a key uses the same notation with the assignment operator `<-`. @@ -130,14 +144,14 @@ jk["ds4", "ds5"] <- NULL ## Join Keys Relationships -There are 2 types of relationships encoded with `join_keys` described in the following sections. The _primary_ and _foreign_ keys are created explicitly using the `join_key` function. +There are 2 types of relationships encoded with joining keys that are described in the following sections. The _primary_ and _foreign_ keys are created explicitly using the constructor for individual keys (`join_key`). -Additionally, the `join_keys` object detects implicit relationships when two datasets share foreign keys to a parent dataset, but not between themselves. These implicit relationships are available just like another foreign key and can be used to merge datasets, despite not being defined by the user. +Additionally, the `join_keys` object infers implicit relationships when two datasets share foreign keys to a parent dataset, but not between themselves. These implicit relationships are available just like another foreign key and can be used to merge datasets, despite not being defined by the user. ### Primary Key with `teal_data` When using the `teal_data` function, the simplest method to define the join keys is to use the `join_keys` argument. -We can specify the column(s) of the dataset that _(together)_ uniquely identify rows in the dataset. +We can specify the column(s) of the dataset that (together) uniquely identify rows in the dataset. ```{r, include=FALSE} # nolint start: commented_code_linter. @@ -160,8 +174,6 @@ join_keys(td_pk) We can extend the previous example and define primary keys for multiple datasets: ```{r, message=FALSE} -library(teal.data) - td_pk <- within( td_pk, { @@ -183,8 +195,8 @@ join_keys(td_pk) ### Foreign Keys with `teal_data` -When passing multiple datasets to the `teal_data` function, dataset relationships are set using `join_keys` and `join_key` and these are used to merge datasets together within `teal` apps. -For users familiar with `SQL` database schema, these relationships are symmetric and not as strict as `SQL` foreign key relationships as `teal` does not validate whether the values inserted into foreign key columns are present in the parent table. +When passing multiple datasets to the `teal_data` function, dataset relationships are set using `join_keys` and `join_key` functions, which then can be used to merge datasets together within `teal` apps. +For users familiar with `SQL` database schema, these relationships are symmetric and not as strict as `SQL` foreign key relationships as `teal` does not validate whether the values defined as foreign key columns are present in the table. For example: @@ -207,22 +219,19 @@ join_keys(td_fk) <- join_keys( join_key("ds2", keys = c("V", "W")), join_key("ds3", keys = c("V")), # Foreign keys - join_key("ds2", "ds1", c("W" = "X")), - join_key("ds3", "ds2", c("V" = "V")) + join_key("ds1", "ds2", c("X" = "W")), + join_key("ds2", "ds3", c("V" = "V")) ) -# The parent of ds2 and ds3 is ds1 -parents(td_fk) <- list(ds2 = "ds1", ds3 = "ds2") - join_keys(td_fk) ``` ### Implicit Relationships -Two datasets that share common _foreign_ keys to the same _parent_ dataset have an implicit relationship between them that is modeled and accessible in `join_keys`. +Two datasets that share common _foreign_ keys to the same _parent_ dataset have an implicit relationship between them that is modeled and accessible in joining keys. -This is a special inferred relationship from existing `join_keys` that does not need to be explicitly defined and can be seamlessly accessible just as any other foreign key. -As any other foreign key it can be overwritten. +This is a special inferred relationship from existing foreign keys that does not need to be explicitly defined and can be seamlessly accessible just as any other foreign key. +As any other foreign key they can be overwritten. These implicit relationships can be used to merge 2 datasets together, just as if they were defined manually. @@ -247,14 +256,11 @@ join_keys(td) <- join_keys( join_key("ds3", keys = c("V")), join_key("ds4", keys = c("V")), # Foreign keys - join_key("ds2", "ds1", c("W" = "X")), - join_key("ds3", "ds2", c("V" = "V")), - join_key("ds4", "ds1", c("V" = "X")) + join_key("ds1", "ds2", c("X" = "W")), + join_key("ds2", "ds3", c("V" = "V")), + join_key("ds1", "ds4", c("X" = "B")) ) -# The parent of ds2 and ds3 is ds1 -parents(td) <- list(ds2 = "ds1", ds3 = "ds2", ds4 = "ds1") - join_keys(td) join_keys(td)["ds2", "ds4"] From b05cf02f4363ea73dc108b2057be2f1a6b3b3cfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Tue, 12 Dec 2023 14:33:32 +0100 Subject: [PATCH 08/27] docs: minor changes --- vignettes/join-keys.Rmd | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/vignettes/join-keys.Rmd b/vignettes/join-keys.Rmd index ed02664e2..fbd57bfff 100644 --- a/vignettes/join-keys.Rmd +++ b/vignettes/join-keys.Rmd @@ -16,16 +16,16 @@ The `teal.data` package provides a way to define primary keys for a dataset and Each dataset _joining keys_ can be characterized by: -- Columns constituting the primary key -- Foreign/merge keys, analogous to `SQL` foreign keys +- Columns constituting the primary key; +- Foreign/merge keys, analogous to `SQL` foreign keys. Typically, an application developer specifies these keys manually. However, for datasets following the `ADaM` standard, `teal.data` can automatically assign keys using the `default_cdisc_join_keys` object. Refer to the section ["Joining Keys with `ADaM` Datasets"](#join-keys-in-adam-datasets) for details on using this object to select specific datasets. -##### Uses of _join_keys_ class in _teal_ applications +##### Uses of `join_keys` class in _teal_ applications -The primary function of _joining keys_ in `teal` applications is to facilitate the seamless [merging of datasets](https://insightsengineering.github.io/teal.transform/latest-tag/articles/data-merge.html) using `teal.transform`. +The primary function of the `join_keys` class in `teal` applications is to facilitate the seamless [merging of datasets](https://insightsengineering.github.io/teal.transform/latest-tag/articles/data-merge.html) using `teal.transform`. Additionally, it plays a role on the data filtering using the _[Filter Panel](https://insightsengineering.github.io/teal/main/articles/filter-panel.html)_ in a `teal` application. The filters applied to a (parent) dataset are also applied to their children. @@ -36,24 +36,24 @@ The `join_keys` object contains information about the foreign/primary keys of mu Each key is represented by a pair of datasets (by name reference) and a named character vector that encodes the column name mapping between the two datasets. In addition, a foreign key may also contain a _parent-child_ attribute that is used in the "Filter Panel" as we mentioned above. -A new joining key can be created as an empty object, or by defining an initial set of primary and foreign keys. +A new join keys can be created as an empty object, or by defining an initial set of primary and foreign keys. That initial object can be extended by adding/modifying/removing keys and by establishing parent-child relationships between datasets. -##### `join_keys(...)`: Joining Keys Constructor / Getter / Setter +##### `join_keys(...)`: Join Keys Constructor / Getter / Setter -Convenient function that is used both as the constructor and as the getter for joining keys. +Convenient function that is used both as the constructor and as the getter for _join_keys_ objects. -As the _Getter_ it is used to retrieve the joining keys that are contained in other objects, such as `teal_data`. +As the _Getter_ it is used to retrieve the _joining keys_ that are contained in other objects, such as a `teal_data` object. -As the _Constructor_ it is used to specify a collection of multiple individual keys (via `join_key` function). +As the _Constructor_ it is used to specify a collection of multiple individual keys (via `join_key` function described below). -##### `join_key(dataset_1, dataset_2, key, parent)`: Individual Joining Key Constructor +##### `join_key(dataset_1, dataset_2, key, parent)`: Single Join Key Constructor -Specifies the relationship between two datasets. +Specifies a primary key or a relationship between two datasets. -- `dataset_1`, `dataset_2`: names of the datasets (if `dataset_2` is the same as `dataset_1` or is omitted, it creates a primary key) -- `key` (optional): named vector of column names +- `dataset_1`, `dataset_2`: names of the datasets (if `dataset_2` is the same as `dataset_1` or is omitted, it creates a primary key); +- `key` (optional): named vector of column names; - `parent` (optional): indicates which dataset (`"dataset_1"` or `"dataset_2"`) is the parent in a _parent-child_ relationship, or `"none"` for an undirected relationship. Note that join keys are assumed to be symmetric, i.e., `join_key("ds1", "ds2", c("ds1_col" = "ds2_col"))` establishes a relationship from "x" to "y" and vice versa. @@ -96,7 +96,7 @@ jk ## Accessing and Modifying keys -The _subset_ operator with 2 indices (`x[i, j]`) is used to retrieve the primary/foreign keys in joining keys. +The _subset_ operator with 2 indices (`x[i, j]`) is used to retrieve the primary/foreign keys. Both indices must be a string denoting the dataset name. ```{r} @@ -144,7 +144,7 @@ jk["ds4", "ds5"] <- NULL ## Join Keys Relationships -There are 2 types of relationships encoded with joining keys that are described in the following sections. The _primary_ and _foreign_ keys are created explicitly using the constructor for individual keys (`join_key`). +There are 2 types of relationships encoded with _joining keys_ that are described in the following sections. The _primary_ and _foreign_ keys are created explicitly using the constructor for individual keys (`join_key`). Additionally, the `join_keys` object infers implicit relationships when two datasets share foreign keys to a parent dataset, but not between themselves. These implicit relationships are available just like another foreign key and can be used to merge datasets, despite not being defined by the user. @@ -228,7 +228,7 @@ join_keys(td_fk) ### Implicit Relationships -Two datasets that share common _foreign_ keys to the same _parent_ dataset have an implicit relationship between them that is modeled and accessible in joining keys. +Two datasets that share common _foreign_ keys to the same _parent_ dataset have an implicit relationship between them that is modeled and accessible in _joining keys_. This is a special inferred relationship from existing foreign keys that does not need to be explicitly defined and can be seamlessly accessible just as any other foreign key. As any other foreign key they can be overwritten. From c7f6c897785759ceeeb3c936105194e3f76f44a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Tue, 12 Dec 2023 14:48:14 +0100 Subject: [PATCH 09/27] tests: better support for default parent behavior --- tests/testthat/test-join_keys-c.R | 39 +++++++++++-------------- tests/testthat/test-join_keys-extract.R | 35 +++++++++------------- tests/testthat/test-join_keys-names.R | 15 +++++----- 3 files changed, 38 insertions(+), 51 deletions(-) diff --git a/tests/testthat/test-join_keys-c.R b/tests/testthat/test-join_keys-c.R index c9390e56f..c3af719a9 100644 --- a/tests/testthat/test-join_keys-c.R +++ b/tests/testthat/test-join_keys-c.R @@ -118,17 +118,16 @@ testthat::test_that("c.join_key_set merges with empty and non-empty parents", { jk2 <- join_keys( join_key("d3", "d3", "c"), join_key("d4", "d4", "d"), - join_key("d4", "d3", "cd") + join_key("d3", "d4", "cd") ) - parents(jk2) <- list(d3 = "d4") expected <- join_keys( join_key("d1", "d1", "a"), join_key("d3", "d3", "c"), join_key("d4", "d4", "d"), - join_key("d3", "d4", "cd") + join_key("d3", "d4", "cd", parent = "none") ) - parents(expected) <- list(d3 = "d4") + parents(expected) <- list(d4 = "d3") testthat::expect_identical( c(jk1, jk2), @@ -147,16 +146,15 @@ testthat::test_that("c.join_key_set merges parents also", { join_key("d2", "d2", "b"), join_key("d1", "d2", "ab") ) - parents(jk1) <- list(d1 = "d2") jk2 <- join_key("d3", "d3", "c") expected <- join_keys( join_key("d1", "d1", "a"), join_key("d2", "d2", "b"), - join_key("d1", "d2", "ab"), + join_key("d1", "d2", "ab", parent = "none"), join_key("d3", "d3", "c") ) - parents(expected) <- list(d1 = "d2") + parents(expected) <- list(d2 = "d1") testthat::expect_equal( c(jk2, jk1), @@ -170,23 +168,22 @@ testthat::test_that("c.join_keys merges parents also", { join_key("d2", "d2", "b"), join_key("d1", "d2", "ab") ) - parents(jk1) <- list(d1 = "d2") + jk2 <- join_keys( join_key("d3", "d3", "c"), join_key("d4", "d4", "d"), - join_key("d4", "d3", "cd") + join_key("d3", "d4", "cd") ) - parents(jk2) <- list(d3 = "d4") expected <- join_keys( join_key("d1", "d1", "a"), join_key("d2", "d2", "b"), - join_key("d1", "d2", "ab"), + join_key("d1", "d2", "ab", parent = "none"), join_key("d3", "d3", "c"), join_key("d4", "d4", "d"), - join_key("d3", "d4", "cd") + join_key("d3", "d4", "cd", parent = "none") ) - parents(expected) <- list(d1 = "d2", d3 = "d4") + parents(expected) <- list(d2 = "d1", d4 = "d3") testthat::expect_identical( c(jk1, jk2), @@ -201,28 +198,26 @@ testthat::test_that("c.join_keys merges existing parents are overwritten", { join_key("d3", "d3", "c"), join_key("d4", "d4", "d"), join_key("d1", "d2", "ab"), - join_key("d4", "d3", "cd") + join_key("d3", "d4", "cd") ) - parents(jk1) <- list(d1 = "d2", d3 = "d4") jk2 <- join_keys( join_key("d2", "d2", "b"), - join_key("d3", "d2", "cb") + join_key("d2", "d3", "cb") ) - parents(jk2) <- list(d3 = "d2") expected <- join_keys( join_key("d1", "d1", "a"), join_key("d2", "d2", "b"), join_key("d3", "d3", "c"), join_key("d4", "d4", "d"), - join_key("d1", "d2", "ab"), - join_key("d4", "d3", "cd"), - join_key("d3", "d2", "cb") + join_key("d1", "d2", "ab", parent = "none"), + join_key("d3", "d4", "cd", parent = "none"), + join_key("d2", "d3", "cb", parent = "none") ) - parents(expected) <- list(d1 = "d2", d3 = "d2") + parents(expected) <- list(d2 = "d1", d3 = "d2", d4 = "d3") - testthat::expect_identical(c(jk1, jk2), expected) + testthat::expect_equal(c(jk1, jk2), expected) }) testthat::test_that("c.join_keys throws error when merge produces acyclical graph", { diff --git a/tests/testthat/test-join_keys-extract.R b/tests/testthat/test-join_keys-extract.R index 4e81540f4..b7d57895e 100644 --- a/tests/testthat/test-join_keys-extract.R +++ b/tests/testthat/test-join_keys-extract.R @@ -55,17 +55,15 @@ testthat::test_that("join_keys[i] returns join_keys object for given dataset inc join_key("d1", "d1", "a"), join_key("d2", "d2", "b"), join_key("d3", "d3", "c"), - join_key("d2", "d1", "ab"), - join_key("d3", "d1", "ac") + join_key("d1", "d2", "ab"), + join_key("d1", "d3", "ac") ) - parents(my_keys) <- list("d2" = "d1", "d3" = "d1") expected <- join_keys( join_key("d1", "d1", "a"), join_key("d2", "d2", "b"), - join_key("d2", "d1", "ab") + join_key("d1", "d2", "ab") ) - parents(expected) <- list("d2" = "d1") testthat::expect_equal(my_keys["d2"], expected) }) @@ -75,17 +73,15 @@ testthat::test_that("join_keys[i] returns join_keys object for given dataset and join_key("d1", "d1", "a"), join_key("d2", "d2", "b"), join_key("d3", "d3", "c"), - join_key("d2", "d1", "ab"), - join_key("d3", "d1", "ac") + join_key("d1", "d2", "ab"), + join_key("d1", "d3", "ac") ) - parents(my_keys) <- list("d2" = "d1", "d3" = "d1") expected <- join_keys( join_key("d1", "d1", "a"), join_key("d2", "d2", "b"), - join_key("d2", "d1", "ab") + join_key("d1", "d2", "ab") ) - parents(expected) <- list("d2" = "d1") testthat::expect_equal(my_keys["d2"], expected) }) @@ -163,10 +159,9 @@ testthat::test_that( join_key("a", "a", "aa"), join_key("b", "b", "bb"), join_key("c", "c", "cc"), - join_key("b", "a", c(child = "a1")), - join_key("c", "a", c(child = "a2")) + join_key("a", "b", c("a1" = "aa")), + join_key("a", "c", c("a2" = "aa")) ) - parents(my_keys) <- list("b" = "a", "c" = "a") testthat::expect_null(my_keys["b", "c"]) } ) @@ -178,12 +173,11 @@ testthat::test_that( join_key("a", "a", "aa"), join_key("b", "b", "bb"), join_key("c", "c", "cc"), - join_key("b", "a", "child-parent"), - join_key("c", "a", "child-parent"), - join_key("d", "b", "grandchild-child"), - join_key("e", "c", "grandchild-child") + join_key("a", "b", "child-parent"), + join_key("a", "c", "child-parent"), + join_key("b", "d", "grandchild-child"), + join_key("c", "e", "grandchild-child") ) - parents(my_keys) <- list("b" = "a", "c" = "a", "d" = "b", "e" = "c") testthat::expect_null(my_keys["d", "e"]) } ) @@ -195,10 +189,9 @@ testthat::test_that( join_key("a", "a", "aa"), join_key("b", "b", "bb"), join_key("c", "c", "cc"), - join_key("b", "a", c(bb = "aa")), - join_key("c", "a", c(cc = "aa")) + join_key("a", "b", c("aa" = "bb")), + join_key("a", "c", c("aa" = "cc")) ) - parents(my_keys) <- list("b" = "a", "c" = "a") # "bb" and "cc" are the names in child datasets, "aa" is the name in parent dataset testthat::expect_identical(my_keys["b", "c"], c(bb = "cc")) } diff --git a/tests/testthat/test-join_keys-names.R b/tests/testthat/test-join_keys-names.R index 53ca3cf07..40f4436ca 100644 --- a/tests/testthat/test-join_keys-names.R +++ b/tests/testthat/test-join_keys-names.R @@ -22,20 +22,19 @@ testthat::test_that("names<-.join_keys will replace names at all levels of the j testthat::test_that("names<-.join_keys will replace names at all levels of the join_keys list when parents set", { jk <- join_keys( join_key("a", "a", "a"), - join_key("b", "a", "ba"), - join_key("c", "a", "ca"), - join_key("d", "b", "db") + join_key("a", "b", "ba"), + join_key("a", "c", "ca"), + join_key("b", "d", "db") ) - parents(jk) <- list(b = "a", c = "a", d = "b") expected <- join_keys( join_key("a", "a", "a"), - join_key("B", "a", "ba"), - join_key("c", "a", "ca"), - join_key("d", "B", "db") + join_key("B", "a", "ba", parent = "none"), + join_key("c", "a", "ca", parent = "none"), + join_key("d", "B", "db", parent = "none") ) parents(expected) <- list(B = "a", c = "a", d = "B") names(jk)[2] <- "B" - testthat::expect_identical(jk, expected) + testthat::expect_equal(jk, expected) }) From 8a09b00d63f534502074864ade4030556d1bfcca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Tue, 12 Dec 2023 15:13:21 +0100 Subject: [PATCH 10/27] lint: adds nolint (hidden) block in vignette --- vignettes/join-keys.Rmd | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/vignettes/join-keys.Rmd b/vignettes/join-keys.Rmd index fbd57bfff..00cf4f264 100644 --- a/vignettes/join-keys.Rmd +++ b/vignettes/join-keys.Rmd @@ -62,6 +62,10 @@ By default the new joining key will set the `dataset_1` as the parent. ##### Example & Output +```{r, include=FALSE} +# nolint start: commented_code_linter. +``` + ```{r, results="hide", message=FALSE, tidy=FALSE} library(teal.data) jk <- join_keys( @@ -77,6 +81,10 @@ jk <- join_keys( jk ``` +```{r, include=FALSE} +# nolint end: commented_code_linter. +``` + | Output of `print(jk)` | Output annotation | | ---------------------------------- |:----------------------------------------:| | `## A join_keys object containing foreign keys between 3 datasets:` | **Title** | From a293fb0e457fd7a0086b1b386f1411bd99391a35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Tue, 12 Dec 2023 15:47:00 +0100 Subject: [PATCH 11/27] feat: remove unecessary iterations and adds coverage --- R/join_keys-utils.R | 51 +++++++++---------------- tests/testthat/test-join_keys-extract.R | 17 +++++++++ tests/testthat/test-join_keys-parents.R | 6 +++ 3 files changed, 42 insertions(+), 32 deletions(-) diff --git a/R/join_keys-utils.R b/R/join_keys-utils.R index 005ae3bc9..4939d99bc 100644 --- a/R/join_keys-utils.R +++ b/R/join_keys-utils.R @@ -111,44 +111,31 @@ update_keys_given_parents <- function(x) { datanames <- names(jk) duplicate_pairs <- list() - for (d1 in datanames) { + for (d1_ix in seq_along(datanames)) { + d1 <- datanames[[d1_ix]] d1_pk <- jk[[d1]][[d1]] d1_parent <- parent(jk, d1) - for (d2 in datanames) { - if (identical(d2, d1) || paste(d2, d1) %in% duplicate_pairs) { - next - } + for (d2 in datanames[-1 * seq.int(d1_ix)]) { if (length(jk[[d1]][[d2]]) == 0) { d2_parent <- parent(jk, d2) d2_pk <- jk[[d2]][[d2]] - fk <- if (identical(d1, d2_parent)) { - # first is parent of second -> parent keys -> first keys - d1_pk - } else if (identical(d1_parent, d2)) { - # second is parent of first -> parent keys -> second keys - d2_pk - } else if (identical(d1_parent, d2_parent) && length(d1_parent) > 0) { - # both has the same parent -> common keys to parent - keys_d1_parent <- sort(jk[[d1]][[d1_parent]]) - keys_d2_parent <- sort(jk[[d2]][[d2_parent]]) - - common_ix_1 <- unname(keys_d1_parent) %in% unname(keys_d2_parent) - common_ix_2 <- unname(keys_d2_parent) %in% unname(keys_d1_parent) - - if (all(!common_ix_1)) { - # No common keys between datasets - leave empty - next - } - - structure( - names(keys_d2_parent)[common_ix_2], - names = names(keys_d1_parent)[common_ix_1] - ) - } else { - # cant find connection - leave empty - next - } + if (!identical(d1_parent, d2_parent) || length(d1_parent) == 0) next + + # both has the same parent -> common keys to parent + keys_d1_parent <- sort(jk[[d1]][[d1_parent]]) + keys_d2_parent <- sort(jk[[d2]][[d2_parent]]) + + common_ix_1 <- unname(keys_d1_parent) %in% unname(keys_d2_parent) + common_ix_2 <- unname(keys_d2_parent) %in% unname(keys_d1_parent) + + # No common keys between datasets - leave empty + if (all(!common_ix_1)) next + + fk <- structure( + names(keys_d2_parent)[common_ix_2], + names = names(keys_d1_parent)[common_ix_1] + ) jk[[d1]][[d2]] <- fk # mutate join key duplicate_pairs <- append(duplicate_pairs, paste(d1, d2)) } diff --git a/tests/testthat/test-join_keys-extract.R b/tests/testthat/test-join_keys-extract.R index b7d57895e..9976d6d56 100644 --- a/tests/testthat/test-join_keys-extract.R +++ b/tests/testthat/test-join_keys-extract.R @@ -260,6 +260,23 @@ testthat::test_that("join_keys[i,j]<- throws when i or j are longer than 1", { testthat::expect_error(my_keys["a", c("a", "b")] <- "new key") }) +testthat::test_that("join_keys[i,j]<- removes keys with NULL", { + my_keys <- join_keys( + join_key("d1", "d1", "A"), + join_key("d2", "d2", "B"), + join_key("d1", "d2", c("A" = "B")) + ) + my_keys["d2", "d1"] <- NULL + + testthat::expect_equal( + my_keys, + join_keys( + join_key("d1", "d1", "A"), + join_key("d2", "d2", "B") + ) + ) +}) + # [[<-.join_keys ------------------------------------------------ testthat::test_that("[[<-.join_keys accepts named list where each containing character", { jk <- join_keys() diff --git a/tests/testthat/test-join_keys-parents.R b/tests/testthat/test-join_keys-parents.R index e19c58b99..d92f4b10d 100644 --- a/tests/testthat/test-join_keys-parents.R +++ b/tests/testthat/test-join_keys-parents.R @@ -4,6 +4,12 @@ testthat::test_that("parents will return empty list when empty/not set", { testthat::expect_identical(parents(jk), list()) }) +testthat::test_that("parents will return empty list when attribute does not exist", { + jk <- join_keys() + attr(jk, "parents") <- NULL + testthat::expect_identical(parents(jk), list()) +}) + testthat::test_that("parents returns the same list as used in parents<-", { jk <- join_keys(join_key("a", "b", "ab")) parents <- list(b = "a") From a11a66390389eab1d78009405473648a3a19c33f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Wed, 13 Dec 2023 10:05:44 +0100 Subject: [PATCH 12/27] fix: revert to base::match.arg suggestion from @chlebowa --- R/join_key.R | 2 +- R/join_keys-extract.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/join_key.R b/R/join_key.R index cdab26fd0..35f4fbdef 100644 --- a/R/join_key.R +++ b/R/join_key.R @@ -30,7 +30,7 @@ #' join_key("d1", "d2", c("A" = "B")) #' join_key("d1", "d2", c("A" = "B", "C")) join_key <- function(dataset_1, dataset_2 = dataset_1, keys, parent = c("dataset_1", "dataset_2", "none")) { - parent <- checkmate::matchArg(parent, choices = c("dataset_1", "dataset_2", "none"), .var.name = "parent") + parent <- match.arg(parent) checkmate::assert_string(dataset_1) checkmate::assert_string(dataset_2) checkmate::assert_character(keys, any.missing = FALSE) diff --git a/R/join_keys-extract.R b/R/join_keys-extract.R index 3e948bab0..3af6cdc28 100644 --- a/R/join_keys-extract.R +++ b/R/join_keys-extract.R @@ -144,7 +144,7 @@ #' #' jk["ds5", "ds5"] <- NULL `[<-.join_keys` <- function(x, i, j, parent = c("i", "j", "none"), value) { - parent <- checkmate::matchArg(parent, choices = c("i", "j", "none"), .var.name = "parent") + parent <- match.arg(parent) if (missing(i) || missing(j)) { stop("join_keys[i, j] specify both indices to set a key pair.") } else if (!missing(i) && is.null(i) || !missing(j) && is.null(j)) { From e731ecd82dc16d97dcee2d46bc39d2a3077ba129 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:58:47 +0100 Subject: [PATCH 13/27] Update R/join_key.R MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: kartikeya kirar Signed-off-by: André Veríssimo <211358+averissimo@users.noreply.github.com> --- R/join_key.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/join_key.R b/R/join_key.R index 35f4fbdef..b8c00c8c1 100644 --- a/R/join_key.R +++ b/R/join_key.R @@ -2,8 +2,9 @@ #' #' @description `r lifecycle::badge("stable")` #' -#' @description `Create a relationship for the variables on a pair of datasets. -#' It is directed by default with `dataset_1` being the parent. +#' @description Create a relationship between two datasets, `dataset_1` and `dataset_2`. +#' By default, this function establishes a directed relationship with `dataset_1` as the parent. +#' If `dataset_2` is not specified, the function creates a primary key for `dataset_1`. #' #' @param dataset_1,dataset_2 (`character(1)`) Dataset names. When `dataset_2` is omitted, #' a primary key for `dataset_1` is created. From fe501458ad5b70e5afe59e7fd132760a62022c9b Mon Sep 17 00:00:00 2001 From: "27856297+dependabot-preview[bot]@users.noreply.github.com" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:00:53 +0000 Subject: [PATCH 14/27] [skip actions] Roxygen Man Pages Auto Update --- man/join_key.Rd | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/man/join_key.Rd b/man/join_key.Rd index c3fc99f80..7179e8717 100644 --- a/man/join_key.Rd +++ b/man/join_key.Rd @@ -34,7 +34,9 @@ object of class \code{join_key_set} to be passed into \code{join_keys} function. \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} -\verb{Create a relationship for the variables on a pair of datasets. It is directed by default with }dataset_1` being the parent. +Create a relationship between two datasets, \code{dataset_1} and \code{dataset_2}. +By default, this function establishes a directed relationship with \code{dataset_1} as the parent. +If \code{dataset_2} is not specified, the function creates a primary key for \code{dataset_1}. } \examples{ join_key("d1", "d2", c("A")) From 80b5118f5c4ccba71919f44cf71b8e8843f96fa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:35:53 +0100 Subject: [PATCH 15/27] feat: change parameter from parent to directed --- R/join_key.R | 24 ++++++++++-------------- R/join_keys-extract.R | 18 ++++++------------ R/join_keys.R | 3 ++- man/join_key.Rd | 13 ++++--------- man/join_keys.Rd | 13 +++++++------ tests/testthat/test-join_keys-c.R | 14 +++++++------- tests/testthat/test-join_keys-extract.R | 4 ++-- tests/testthat/test-join_keys-names.R | 6 +++--- tests/testthat/test-join_keys-parents.R | 24 ++++++++++++------------ tests/testthat/test-join_keys-print.R | 10 +++++----- tests/testthat/test-join_keys.R | 17 +++++++++-------- vignettes/join-keys.Rmd | 2 +- 12 files changed, 68 insertions(+), 80 deletions(-) diff --git a/R/join_key.R b/R/join_key.R index b8c00c8c1..bb5750c62 100644 --- a/R/join_key.R +++ b/R/join_key.R @@ -16,9 +16,9 @@ #' #' If any element of the `keys` vector is empty with a non-empty name, then the name is #' used for both datasets. -#' @param parent (`character(1)`) Indicates the parent dataset in a parent-child -#' relationship or `none` if it is an undirected relationship. -#' One of `dataset_1`, `dataset_2` or `none`. +#' @param directed (`logical(1)`) Flag that indicates whether `dataset_1` is +#' defined as the parent of `dataset_2` in the relationship. When `FALSE` the +#' relationship becomes undirected. #' #' @return object of class `join_key_set` to be passed into `join_keys` function. #' @@ -30,11 +30,11 @@ #' join_key("d1", "d2", c("A")) #' join_key("d1", "d2", c("A" = "B")) #' join_key("d1", "d2", c("A" = "B", "C")) -join_key <- function(dataset_1, dataset_2 = dataset_1, keys, parent = c("dataset_1", "dataset_2", "none")) { - parent <- match.arg(parent) +join_key <- function(dataset_1, dataset_2 = dataset_1, keys, directed = TRUE) { checkmate::assert_string(dataset_1) checkmate::assert_string(dataset_2) checkmate::assert_character(keys, any.missing = FALSE) + checkmate::assert_logical(directed) if (length(keys) > 0) { if (is.null(names(keys))) { @@ -71,15 +71,11 @@ join_key <- function(dataset_1, dataset_2 = dataset_1, keys, parent = c("dataset keys <- NULL } - if (dataset_1 == dataset_2) { - parent <- "none" - } - - new_parents <- switch(parent, - dataset_1 = structure(list(dataset_1), names = dataset_2), - dataset_2 = structure(list(dataset_2), names = dataset_1), + parents <- if (directed && dataset_1 != dataset_2) { + structure(list(dataset_1), names = dataset_2) + } else { list() - ) + } structure( list( @@ -90,6 +86,6 @@ join_key <- function(dataset_1, dataset_2 = dataset_1, keys, parent = c("dataset ), names = dataset_1, class = "join_key_set", - parents = new_parents + parents = parents ) } diff --git a/R/join_keys-extract.R b/R/join_keys-extract.R index 3af6cdc28..230121ff2 100644 --- a/R/join_keys-extract.R +++ b/R/join_keys-extract.R @@ -117,9 +117,9 @@ #' @rdname join_keys #' @order 2 #' -#' @param parent (`character(1)`) indicates which dataset is the parent in the -#' relationship or `none` if it is an undirected relationship. One of `i`, -#' `j` or `none`. +#' @param directed (`logical(1)`) Flag that indicates whether `dataset_1` is +#' defined as the parent of `dataset_2` in the relationship. When `FALSE` the +#' relationship becomes undirected. #' #' @section Functions: #' - `x[i, j] <- value`: Assignment of a key to pair `(i, j)`. @@ -143,8 +143,8 @@ #' # Removing a key --- #' #' jk["ds5", "ds5"] <- NULL -`[<-.join_keys` <- function(x, i, j, parent = c("i", "j", "none"), value) { - parent <- match.arg(parent) +`[<-.join_keys` <- function(x, i, j, directed = TRUE, value) { + checkmate::assert_logical(directed) if (missing(i) || missing(j)) { stop("join_keys[i, j] specify both indices to set a key pair.") } else if (!missing(i) && is.null(i) || !missing(j) && is.null(j)) { @@ -174,13 +174,7 @@ return(x) } - parent_conversion <- switch(parent, - i = "dataset_1", - j = "dataset_2", - "none" - ) - - c(x, join_key(i, j, value, parent_conversion)) + c(x, join_key(i, j, value, directed)) } #' @noRd diff --git a/R/join_keys.R b/R/join_keys.R index 33fb99e54..3df55e52e 100644 --- a/R/join_keys.R +++ b/R/join_keys.R @@ -26,7 +26,8 @@ #' either `teal_data` or `join_keys` to extract `join_keys`, \cr #' or any number of `join_key_set` objects to create `join_keys`, \cr #' or nothing to create an empty `join_keys` -#' @param value For `x[i, j, parent = "i")] <- value` (named/unnamed `character`) Column mapping between datasets. +#' @param value For `x[i, j, directed = TRUE)] <- value` (named/unnamed `character`) +#' Column mapping between datasets. #' #' For `join_keys(x) <- value`: (`join_key_set` or list of `join_key_set`) relationship #' pairs to add to `join_keys` list. diff --git a/man/join_key.Rd b/man/join_key.Rd index 7179e8717..3a50129a6 100644 --- a/man/join_key.Rd +++ b/man/join_key.Rd @@ -4,12 +4,7 @@ \alias{join_key} \title{Create a relationship between a pair of datasets} \usage{ -join_key( - dataset_1, - dataset_2 = dataset_1, - keys, - parent = c("dataset_1", "dataset_2", "none") -) +join_key(dataset_1, dataset_2 = dataset_1, keys, directed = TRUE) } \arguments{ \item{dataset_1, dataset_2}{(\code{character(1)}) Dataset names. When \code{dataset_2} is omitted, @@ -24,9 +19,9 @@ If unnamed, the same column names are used for both datasets. If any element of the \code{keys} vector is empty with a non-empty name, then the name is used for both datasets.} -\item{parent}{(\code{character(1)}) Indicates the parent dataset in a parent-child -relationship or \code{none} if it is an undirected relationship. -One of \code{dataset_1}, \code{dataset_2} or \code{none}.} +\item{directed}{(\code{logical(1)}) Flag that indicates whether \code{dataset_1} is +defined as the parent of \code{dataset_2} in the relationship. When \code{FALSE} the +relationship becomes undirected.} } \value{ object of class \code{join_key_set} to be passed into \code{join_keys} function. diff --git a/man/join_keys.Rd b/man/join_keys.Rd index b43030a34..c1942127b 100644 --- a/man/join_keys.Rd +++ b/man/join_keys.Rd @@ -28,7 +28,7 @@ join_keys(...) \method{[}{join_keys}(x, i, j) -\method{[}{join_keys}(x, i, j, parent = c("i", "j", "none")) <- value +\method{[}{join_keys}(x, i, j, directed = TRUE) <- value \method{c}{join_keys}(...) @@ -57,16 +57,17 @@ or \code{join_keys(x) <- value} then it can also take a supported class (\code{t \item{i, j}{indices specifying elements to extract or replace. Index should be a a character vector, but it can also take numeric, logical, \code{NULL} or missing.} -\item{parent}{(\code{character(1)}) indicates which dataset is the parent in the -relationship or \code{none} if it is an undirected relationship. One of \code{i}, -\code{j} or \code{none}.} +\item{directed}{(\code{logical(1)}) Flag that indicates whether \code{dataset_1} is +defined as the parent of \code{dataset_2} in the relationship. When \code{FALSE} the +relationship becomes undirected.} -\item{value}{For \verb{x[i, j, parent = "i")] <- value} (named/unnamed \code{character}) Column mapping between datasets. +\item{value}{For \verb{x[i, j, directed = TRUE)] <- value} (named/unnamed \code{character}) +Column mapping between datasets. For \code{join_keys(x) <- value}: (\code{join_key_set} or list of \code{join_key_set}) relationship pairs to add to \code{join_keys} list. -[i, j, parent = "i")]: R:i,\%20j,\%20parent\%20=\%20\%22i\%22)} +[i, j, directed = TRUE)]: R:i,\%20j,\%20directed\%20=\%20TRUE)} } \value{ \code{join_keys} object. diff --git a/tests/testthat/test-join_keys-c.R b/tests/testthat/test-join_keys-c.R index c3af719a9..6c1451a81 100644 --- a/tests/testthat/test-join_keys-c.R +++ b/tests/testthat/test-join_keys-c.R @@ -125,7 +125,7 @@ testthat::test_that("c.join_key_set merges with empty and non-empty parents", { join_key("d1", "d1", "a"), join_key("d3", "d3", "c"), join_key("d4", "d4", "d"), - join_key("d3", "d4", "cd", parent = "none") + join_key("d3", "d4", "cd", directed = FALSE) ) parents(expected) <- list(d4 = "d3") @@ -151,7 +151,7 @@ testthat::test_that("c.join_key_set merges parents also", { expected <- join_keys( join_key("d1", "d1", "a"), join_key("d2", "d2", "b"), - join_key("d1", "d2", "ab", parent = "none"), + join_key("d1", "d2", "ab", directed = FALSE), join_key("d3", "d3", "c") ) parents(expected) <- list(d2 = "d1") @@ -178,10 +178,10 @@ testthat::test_that("c.join_keys merges parents also", { expected <- join_keys( join_key("d1", "d1", "a"), join_key("d2", "d2", "b"), - join_key("d1", "d2", "ab", parent = "none"), + join_key("d1", "d2", "ab", directed = FALSE), join_key("d3", "d3", "c"), join_key("d4", "d4", "d"), - join_key("d3", "d4", "cd", parent = "none") + join_key("d3", "d4", "cd", directed = FALSE) ) parents(expected) <- list(d2 = "d1", d4 = "d3") @@ -211,9 +211,9 @@ testthat::test_that("c.join_keys merges existing parents are overwritten", { join_key("d2", "d2", "b"), join_key("d3", "d3", "c"), join_key("d4", "d4", "d"), - join_key("d1", "d2", "ab", parent = "none"), - join_key("d3", "d4", "cd", parent = "none"), - join_key("d2", "d3", "cb", parent = "none") + join_key("d1", "d2", "ab", directed = FALSE), + join_key("d3", "d4", "cd", directed = FALSE), + join_key("d2", "d3", "cb", directed = FALSE) ) parents(expected) <- list(d2 = "d1", d3 = "d2", d4 = "d3") diff --git a/tests/testthat/test-join_keys-extract.R b/tests/testthat/test-join_keys-extract.R index 9976d6d56..b62d61614 100644 --- a/tests/testthat/test-join_keys-extract.R +++ b/tests/testthat/test-join_keys-extract.R @@ -359,7 +359,7 @@ testthat::test_that("[[<- mutating non-existing keys adds them", { my_keys, join_keys( join_key("d1", "d2", "A"), - join_key("d2", "d3", "B", parent = "none") # [[<- doesn't set parent + join_key("d2", "d3", "B", directed = FALSE) # [[<- doesn't set parent ) ) }) @@ -440,5 +440,5 @@ testthat::test_that("[[<-.join_keys fails when provided foreign key pairs for sa testthat::test_that("[[<-.join_keys allows when provided foreign key pairs for same datasets and same keys", { jk <- join_keys() testthat::expect_silent(jk[["ds1"]] <- list(ds2 = "new", ds2 = c("new" = "new"))) - testthat::expect_equal(jk, join_keys(join_key("ds1", "ds2", "new", parent = "none"))) + testthat::expect_equal(jk, join_keys(join_key("ds1", "ds2", "new", directed = FALSE))) }) diff --git a/tests/testthat/test-join_keys-names.R b/tests/testthat/test-join_keys-names.R index 40f4436ca..82813ca99 100644 --- a/tests/testthat/test-join_keys-names.R +++ b/tests/testthat/test-join_keys-names.R @@ -29,9 +29,9 @@ testthat::test_that("names<-.join_keys will replace names at all levels of the j expected <- join_keys( join_key("a", "a", "a"), - join_key("B", "a", "ba", parent = "none"), - join_key("c", "a", "ca", parent = "none"), - join_key("d", "B", "db", parent = "none") + join_key("B", "a", "ba", directed = FALSE), + join_key("c", "a", "ca", directed = FALSE), + join_key("d", "B", "db", directed = FALSE) ) parents(expected) <- list(B = "a", c = "a", d = "B") diff --git a/tests/testthat/test-join_keys-parents.R b/tests/testthat/test-join_keys-parents.R index d92f4b10d..c018ae22d 100644 --- a/tests/testthat/test-join_keys-parents.R +++ b/tests/testthat/test-join_keys-parents.R @@ -25,8 +25,8 @@ testthat::test_that("parents<- accepts a named list containing (non-empty, non-m testthat::test_that("parents<- single parent can be changed utilizing list functionality with [[<-", { jk <- join_keys( - join_key("a", "b", "ab", parent = "none"), - join_key("c", "d", "cd", parent = "none") + join_key("a", "b", "ab", directed = FALSE), + join_key("c", "d", "cd", directed = FALSE) ) parents(jk)[["a"]] <- "b" parents(jk)[["c"]] <- "d" @@ -35,25 +35,25 @@ testthat::test_that("parents<- single parent can be changed utilizing list funct testthat::test_that("parents<- dataset can't be own parent", { jk <- join_keys( - join_key("a", "b", "ab", parent = "none"), - join_key("c", "d", "cd", parent = "none") + join_key("a", "b", "ab", directed = FALSE), + join_key("c", "d", "cd", directed = FALSE) ) testthat::expect_error(parents(jk) <- list(a = "a")) }) testthat::test_that("parents<- setting parent-child relationship fails when no foreign keys between datasets", { jk <- join_keys( - join_key("a", "1", "aa", parent = "none"), - join_key("b", "b", "bb", parent = "none") + join_key("a", "1", "aa", directed = FALSE), + join_key("b", "b", "bb", directed = FALSE) ) testthat::expect_error(parents(jk) <- list(a = "b")) }) testthat::test_that("parents<- ensures it is a directed acyclical graph (DAG)", { cyclic_jk <- join_keys( - join_key("a", "b", "id", parent = "none"), - join_key("b", "c", "id", parent = "none"), - join_key("c", "a", "id", parent = "none") + join_key("a", "b", "id", directed = FALSE), + join_key("b", "c", "id", directed = FALSE), + join_key("c", "a", "id", directed = FALSE) ) testthat::expect_error( parents(cyclic_jk) <- list(a = "b", b = "c", c = "a"), @@ -63,8 +63,8 @@ testthat::test_that("parents<- ensures it is a directed acyclical graph (DAG)", testthat::test_that("parents<- single parent can be changed utilizing list functionality with [[<-", { jk <- join_keys( - join_key("a", "b", "ab", parent = "none"), - join_key("c", "d", "cd", parent = "none") + join_key("a", "b", "ab", directed = FALSE), + join_key("c", "d", "cd", directed = FALSE) ) parents(jk)[["a"]] <- "b" parents(jk)[["c"]] <- "d" @@ -73,7 +73,7 @@ testthat::test_that("parents<- single parent can be changed utilizing list funct }) testthat::test_that("parents<- fails when value isn't a list (non-empty, non-missing) character", { - jk <- join_keys(join_key("a", "b", "test", parent = "none")) + jk <- join_keys(join_key("a", "b", "test", directed = FALSE)) testthat::expect_error(parents(jk) <- list(b = 1), "May only contain the following types") testthat::expect_error(parents(jk) <- list(b = NA_character_), "May not contain") testthat::expect_error(parents(jk) <- list(b = NULL), "May only contain the following types") diff --git a/tests/testthat/test-join_keys-print.R b/tests/testthat/test-join_keys-print.R index c4bb76a39..f9aef2d6d 100644 --- a/tests/testthat/test-join_keys-print.R +++ b/tests/testthat/test-join_keys-print.R @@ -5,11 +5,11 @@ testthat::test_that("format.join_keys for empty set", { testthat::test_that("format.join_keys with empty parents", { my_keys <- join_keys( - join_key("d1", "d1", "a", parent = "none"), - join_key("d2", "d2", "b", parent = "none"), - join_key("d3", "d3", "c", parent = "none"), - join_key("d1", "d2", "ab", parent = "none"), - join_key("d2", "d3", "ac", parent = "none") + join_key("d1", "d1", "a", directed = FALSE), + join_key("d2", "d2", "b", directed = FALSE), + join_key("d3", "d3", "c", directed = FALSE), + join_key("d1", "d2", "ab", directed = FALSE), + join_key("d2", "d3", "ac", directed = FALSE) ) testthat::expect_identical( format(my_keys), diff --git a/tests/testthat/test-join_keys.R b/tests/testthat/test-join_keys.R index 1757b868b..d849cc0d7 100644 --- a/tests/testthat/test-join_keys.R +++ b/tests/testthat/test-join_keys.R @@ -76,7 +76,7 @@ testthat::test_that("join_keys accepts duplicated join_key", { testthat::test_that("join_keys accepts duplicated join_key (undirected)", { testthat::expect_no_error( - join_keys(join_key("d1", "d2", "a", parent = "none"), join_key("d1", "d2", "a", parent = "none")) + join_keys(join_key("d1", "d2", "a", directed = FALSE), join_key("d1", "d2", "a", directed = FALSE)) ) }) @@ -114,20 +114,21 @@ testthat::test_that("join_keys fails when provided foreign key pairs have incomp testthat::test_that("join_keys constructor adds symmetric keys on given (unnamed) foreign key", { my_keys <- join_keys(join_key("d1", "d2", "a")) - testthat::expect_equal( - my_keys, - join_keys(join_key("d2", "d1", "a", parent = "dataset_2")) - ) + expected_keys <- join_keys(join_key("d2", "d1", "a", directed = FALSE)) + parents(expected_keys) <- list(d2 = "d1") + + testthat::expect_equal(my_keys, expected_keys) }) testthat::test_that("join_keys constructor adds symmetric keys on given (named) foreign key", { + expected_keys <- join_keys(join_key("d2", "d1", c(b = "a"), directed = FALSE)) + parents(expected_keys) <- list(d2 = "d1") + testthat::expect_equal( join_keys( join_key("d1", "d2", c(a = "b")) ), - join_keys( - join_key("d2", "d1", c(b = "a"), parent = "dataset_2") - ) + expected_keys ) }) diff --git a/vignettes/join-keys.Rmd b/vignettes/join-keys.Rmd index 00cf4f264..b25b5eadf 100644 --- a/vignettes/join-keys.Rmd +++ b/vignettes/join-keys.Rmd @@ -74,7 +74,7 @@ jk <- join_keys( join_key("ds3", keys = c("col_1", "col_3")), # ds3: [col_1, col_3] join_key("ds1", "ds2", keys = "col_1"), # ds1 <-- ds2 join_key("ds1", "ds3", keys = "col_1"), # ds1 <-- ds3 - join_key("ds4", "ds5", keys = c("col_4" = "col_5"), parent = "none") # ds4 <--> ds5 + join_key("ds4", "ds5", keys = c("col_4" = "col_5"), directed = FALSE) # ds4 <--> ds5 ) # The parent-child relationships are created automatically (unless 'parent' parameter is "none") From 886fa72efd335054adb6ca8cd60360bc17a8d2ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Thu, 14 Dec 2023 17:29:27 +0100 Subject: [PATCH 16/27] docs: rephrase parameter --- R/join_key.R | 7 ++++--- R/join_keys-extract.R | 8 ++++---- man/join_key.Rd | 9 ++++++--- man/join_keys.Rd | 9 ++++++--- 4 files changed, 20 insertions(+), 13 deletions(-) diff --git a/R/join_key.R b/R/join_key.R index bb5750c62..8d49f8ff8 100644 --- a/R/join_key.R +++ b/R/join_key.R @@ -16,9 +16,10 @@ #' #' If any element of the `keys` vector is empty with a non-empty name, then the name is #' used for both datasets. -#' @param directed (`logical(1)`) Flag that indicates whether `dataset_1` is -#' defined as the parent of `dataset_2` in the relationship. When `FALSE` the -#' relationship becomes undirected. +#' @param directed (`logical(1)`) Flag that indicates whether it should create +#' a parent-child relationship between the datasets.\cr +#' - `TRUE` (default) `dataset_1` is the parent of `dataset_2`; +#' - `FALSE` when the relationship is undirected. #' #' @return object of class `join_key_set` to be passed into `join_keys` function. #' diff --git a/R/join_keys-extract.R b/R/join_keys-extract.R index 230121ff2..9d599b3d0 100644 --- a/R/join_keys-extract.R +++ b/R/join_keys-extract.R @@ -117,10 +117,10 @@ #' @rdname join_keys #' @order 2 #' -#' @param directed (`logical(1)`) Flag that indicates whether `dataset_1` is -#' defined as the parent of `dataset_2` in the relationship. When `FALSE` the -#' relationship becomes undirected. -#' +#' @param directed (`logical(1)`) Flag that indicates whether it should create +#' a parent-child relationship between the datasets.\cr +#' - `TRUE` (default) `dataset_1` is the parent of `dataset_2`; +#' - `FALSE` when the relationship is undirected. #' @section Functions: #' - `x[i, j] <- value`: Assignment of a key to pair `(i, j)`. #' - `x[i] <- value`: This (without `j` parameter) **is not** a supported diff --git a/man/join_key.Rd b/man/join_key.Rd index 3a50129a6..012cca15f 100644 --- a/man/join_key.Rd +++ b/man/join_key.Rd @@ -19,9 +19,12 @@ If unnamed, the same column names are used for both datasets. If any element of the \code{keys} vector is empty with a non-empty name, then the name is used for both datasets.} -\item{directed}{(\code{logical(1)}) Flag that indicates whether \code{dataset_1} is -defined as the parent of \code{dataset_2} in the relationship. When \code{FALSE} the -relationship becomes undirected.} +\item{directed}{(\code{logical(1)}) Flag that indicates whether it should create +a parent-child relationship between the datasets.\cr +\itemize{ +\item \code{TRUE} (default) \code{dataset_1} is the parent of \code{dataset_2}; +\item \code{FALSE} when the relationship is undirected. +}} } \value{ object of class \code{join_key_set} to be passed into \code{join_keys} function. diff --git a/man/join_keys.Rd b/man/join_keys.Rd index c1942127b..1eb7ae167 100644 --- a/man/join_keys.Rd +++ b/man/join_keys.Rd @@ -57,9 +57,12 @@ or \code{join_keys(x) <- value} then it can also take a supported class (\code{t \item{i, j}{indices specifying elements to extract or replace. Index should be a a character vector, but it can also take numeric, logical, \code{NULL} or missing.} -\item{directed}{(\code{logical(1)}) Flag that indicates whether \code{dataset_1} is -defined as the parent of \code{dataset_2} in the relationship. When \code{FALSE} the -relationship becomes undirected.} +\item{directed}{(\code{logical(1)}) Flag that indicates whether it should create +a parent-child relationship between the datasets.\cr +\itemize{ +\item \code{TRUE} (default) \code{dataset_1} is the parent of \code{dataset_2}; +\item \code{FALSE} when the relationship is undirected. +}} \item{value}{For \verb{x[i, j, directed = TRUE)] <- value} (named/unnamed \code{character}) Column mapping between datasets. From b1e62fe5cbf475af1842dbc7c8db9d189d9b1c6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Wed, 20 Dec 2023 12:15:40 +0100 Subject: [PATCH 17/27] Apply suggestions from @kartikeyakirar and @gogonzo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: kartikeya kirar Co-authored-by: Dawid Kałędkowski <6959016+gogonzo@users.noreply.github.com> Signed-off-by: André Veríssimo <211358+averissimo@users.noreply.github.com> --- R/join_key.R | 4 ++-- R/join_keys-extract.R | 2 +- R/join_keys-parents.R | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/join_key.R b/R/join_key.R index 8d49f8ff8..6ee197f3c 100644 --- a/R/join_key.R +++ b/R/join_key.R @@ -35,7 +35,7 @@ join_key <- function(dataset_1, dataset_2 = dataset_1, keys, directed = TRUE) { checkmate::assert_string(dataset_1) checkmate::assert_string(dataset_2) checkmate::assert_character(keys, any.missing = FALSE) - checkmate::assert_logical(directed) + checkmate::assert_flag(directed) if (length(keys) > 0) { if (is.null(names(keys))) { @@ -73,7 +73,7 @@ join_key <- function(dataset_1, dataset_2 = dataset_1, keys, directed = TRUE) { } parents <- if (directed && dataset_1 != dataset_2) { - structure(list(dataset_1), names = dataset_2) + setNames(list(dataset_1), dataset_2) } else { list() } diff --git a/R/join_keys-extract.R b/R/join_keys-extract.R index 9d599b3d0..987b8ef9d 100644 --- a/R/join_keys-extract.R +++ b/R/join_keys-extract.R @@ -144,7 +144,7 @@ #' #' jk["ds5", "ds5"] <- NULL `[<-.join_keys` <- function(x, i, j, directed = TRUE, value) { - checkmate::assert_logical(directed) + checkmate::assert_flag(directed) if (missing(i) || missing(j)) { stop("join_keys[i, j] specify both indices to set a key pair.") } else if (!missing(i) && is.null(i) || !missing(j) && is.null(j)) { diff --git a/R/join_keys-parents.R b/R/join_keys-parents.R index 796a47dbd..33dddbc9e 100644 --- a/R/join_keys-parents.R +++ b/R/join_keys-parents.R @@ -73,7 +73,7 @@ parents.teal_data <- function(x) { #' parents(jk)["ds6"] <- "ds5" #' parents(jk)["ds7"] <- "ds6" `parents<-.join_keys` <- function(x, value) { - checkmate::assert_list(value, types = c("character"), names = "named") + checkmate::assert_list(value, types = "character", names = "named") new_parents <- list() From 6ea3a14a59789c6e476fed4ff2104b4505af66f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Wed, 20 Dec 2023 16:47:48 +0100 Subject: [PATCH 18/27] Apply suggestions from @chlebowa MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Aleksander Chlebowski <114988527+chlebowa@users.noreply.github.com> Signed-off-by: André Veríssimo <211358+averissimo@users.noreply.github.com> --- R/join_key.R | 2 +- R/join_keys-parents.R | 2 +- R/join_keys.R | 2 +- vignettes/join-keys.Rmd | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/join_key.R b/R/join_key.R index 6ee197f3c..b9b3f9b89 100644 --- a/R/join_key.R +++ b/R/join_key.R @@ -73,7 +73,7 @@ join_key <- function(dataset_1, dataset_2 = dataset_1, keys, directed = TRUE) { } parents <- if (directed && dataset_1 != dataset_2) { - setNames(list(dataset_1), dataset_2) + stats::setNames(list(dataset_1), dataset_2) } else { list() } diff --git a/R/join_keys-parents.R b/R/join_keys-parents.R index 33dddbc9e..bd696e9d5 100644 --- a/R/join_keys-parents.R +++ b/R/join_keys-parents.R @@ -79,7 +79,7 @@ parents.teal_data <- function(x) { for (dataset in names(value)) { if (checkmate::test_scalar_na(value[[dataset]])) { - checkmate::assert("May not contain `NA_character_`", .var.name = "value") + checkmate::assert("Must not contain `NA_character_`", .var.name = "value") } parent <- new_parents[[dataset]] diff --git a/R/join_keys.R b/R/join_keys.R index 3df55e52e..a48c8452e 100644 --- a/R/join_keys.R +++ b/R/join_keys.R @@ -7,7 +7,7 @@ #' #' @description #' Facilitates the creation and retrieval of relationships between datasets. -#' `join_keys` class extends a list and contains keys connecting pairs of datasets. +#' `join_keys` class extends `list` and contains keys connecting pairs of datasets. #' Each element of the list contains keys for specific dataset. #' Each dataset can have a relationship with itself (primary key) and with other datasets. #' diff --git a/vignettes/join-keys.Rmd b/vignettes/join-keys.Rmd index b25b5eadf..662b13104 100644 --- a/vignettes/join-keys.Rmd +++ b/vignettes/join-keys.Rmd @@ -238,8 +238,8 @@ join_keys(td_fk) Two datasets that share common _foreign_ keys to the same _parent_ dataset have an implicit relationship between them that is modeled and accessible in _joining keys_. -This is a special inferred relationship from existing foreign keys that does not need to be explicitly defined and can be seamlessly accessible just as any other foreign key. -As any other foreign key they can be overwritten. +This is a special relationship that is inferred from existing foreign keys. +It does not need to be explicitly defined but it can be accessed and overwritten just as any other foreign key. These implicit relationships can be used to merge 2 datasets together, just as if they were defined manually. From 9249bdd1169b1ed6aa69480a34aaac92f18bbaa4 Mon Sep 17 00:00:00 2001 From: "27856297+dependabot-preview[bot]@users.noreply.github.com" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Wed, 20 Dec 2023 15:49:48 +0000 Subject: [PATCH 19/27] [skip actions] Roxygen Man Pages Auto Update --- man/join_keys.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/join_keys.Rd b/man/join_keys.Rd index 1eb7ae167..c3c2bde37 100644 --- a/man/join_keys.Rd +++ b/man/join_keys.Rd @@ -77,7 +77,7 @@ pairs to add to \code{join_keys} list. } \description{ Facilitates the creation and retrieval of relationships between datasets. -\code{join_keys} class extends a list and contains keys connecting pairs of datasets. +\code{join_keys} class extends \code{list} and contains keys connecting pairs of datasets. Each element of the list contains keys for specific dataset. Each dataset can have a relationship with itself (primary key) and with other datasets. From 62aac3b007834e1d83f73957ec85d9f5b9a62d05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Wed, 20 Dec 2023 16:02:19 +0000 Subject: [PATCH 20/27] fix: remove unused variable --- R/join_keys-utils.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/join_keys-utils.R b/R/join_keys-utils.R index 4939d99bc..b6bca40bf 100644 --- a/R/join_keys-utils.R +++ b/R/join_keys-utils.R @@ -110,7 +110,6 @@ update_keys_given_parents <- function(x) { checkmate::assert_class(jk, "join_keys", .var.name = checkmate::vname(x)) datanames <- names(jk) - duplicate_pairs <- list() for (d1_ix in seq_along(datanames)) { d1 <- datanames[[d1_ix]] d1_pk <- jk[[d1]][[d1]] @@ -137,7 +136,6 @@ update_keys_given_parents <- function(x) { names = names(keys_d1_parent)[common_ix_1] ) jk[[d1]][[d2]] <- fk # mutate join key - duplicate_pairs <- append(duplicate_pairs, paste(d1, d2)) } } } From ba0ef66a2aa23860680c95ed92bad3a7b4ce01c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Wed, 20 Dec 2023 16:09:06 +0000 Subject: [PATCH 21/27] docs: improve on assertion --- R/join_keys-parents.R | 8 ++++---- tests/testthat/test-join_keys-parents.R | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/join_keys-parents.R b/R/join_keys-parents.R index bd696e9d5..7ae70b68b 100644 --- a/R/join_keys-parents.R +++ b/R/join_keys-parents.R @@ -78,9 +78,8 @@ parents.teal_data <- function(x) { new_parents <- list() for (dataset in names(value)) { - if (checkmate::test_scalar_na(value[[dataset]])) { - checkmate::assert("Must not contain `NA_character_`", .var.name = "value") - } + # Custom .var.name so it is verbose and helpful for users + checkmate::assert_string(value[[dataset]], .var.name = sprintf("value[[\"%s\"]]", dataset)) parent <- new_parents[[dataset]] checkmate::assert( @@ -90,7 +89,8 @@ parents.teal_data <- function(x) { length(value[[dataset]]) == 0 ), checkmate::check_true(parent == value[[dataset]]), - "Please check the difference between provided datasets parents and provided join_keys parents." + "Please check the difference between provided datasets parents and provided join_keys parents.", + .var.name = "value" ) if (is.null(parent)) { new_parents[[dataset]] <- value[[dataset]] diff --git a/tests/testthat/test-join_keys-parents.R b/tests/testthat/test-join_keys-parents.R index c018ae22d..d2c8e9f86 100644 --- a/tests/testthat/test-join_keys-parents.R +++ b/tests/testthat/test-join_keys-parents.R @@ -75,7 +75,7 @@ testthat::test_that("parents<- single parent can be changed utilizing list funct testthat::test_that("parents<- fails when value isn't a list (non-empty, non-missing) character", { jk <- join_keys(join_key("a", "b", "test", directed = FALSE)) testthat::expect_error(parents(jk) <- list(b = 1), "May only contain the following types") - testthat::expect_error(parents(jk) <- list(b = NA_character_), "May not contain") + testthat::expect_error(parents(jk) <- list(b = NA_character_), "May not be NA") testthat::expect_error(parents(jk) <- list(b = NULL), "May only contain the following types") testthat::expect_error(parents(jk) <- NULL, "Must be of type 'list'") }) From ddd729ef944cf17a78773c3f832391026c5f804a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Thu, 21 Dec 2023 17:15:34 +0100 Subject: [PATCH 22/27] Remove rendundant code from @gogonzo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Dawid Kałędkowski <6959016+gogonzo@users.noreply.github.com> Signed-off-by: André Veríssimo <211358+averissimo@users.noreply.github.com> --- tests/testthat/test-join_keys-extract.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/testthat/test-join_keys-extract.R b/tests/testthat/test-join_keys-extract.R index b62d61614..d7e21c40e 100644 --- a/tests/testthat/test-join_keys-extract.R +++ b/tests/testthat/test-join_keys-extract.R @@ -400,8 +400,6 @@ testthat::test_that("[[<-.join_keys removes keys with NULL and applies symmetric ) my_keys[["d1"]][["d2"]] <- NULL - expect_null(my_keys["d1", "d2"]) - expect_null(my_keys["d2", "d1"]) expect_equal( my_keys, From a6c9272a44233f3b1cce92dffb39356cab63e600 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Thu, 21 Dec 2023 16:20:27 +0000 Subject: [PATCH 23/27] fix: removed redundant test --- tests/testthat/test-join_keys-c.R | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test-join_keys-c.R b/tests/testthat/test-join_keys-c.R index 6c1451a81..2c68d0779 100644 --- a/tests/testthat/test-join_keys-c.R +++ b/tests/testthat/test-join_keys-c.R @@ -129,13 +129,8 @@ testthat::test_that("c.join_key_set merges with empty and non-empty parents", { ) parents(expected) <- list(d4 = "d3") - testthat::expect_identical( - c(jk1, jk2), - expected - ) - testthat::expect_equal( - c(jk2, jk1), + c(jk1, jk2), expected ) }) @@ -157,7 +152,7 @@ testthat::test_that("c.join_key_set merges parents also", { parents(expected) <- list(d2 = "d1") testthat::expect_equal( - c(jk2, jk1), + c(jk1, jk2), expected ) }) From b3177082caff4e250e8ddab2384303283e1217f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Thu, 21 Dec 2023 17:03:01 +0000 Subject: [PATCH 24/27] fix: allow incompatible keys on constructor and c --- R/join_keys-c.R | 14 ++------------ tests/testthat/test-join_keys-c.R | 32 ------------------------------- tests/testthat/test-join_keys.R | 23 ---------------------- 3 files changed, 2 insertions(+), 67 deletions(-) diff --git a/R/join_keys-c.R b/R/join_keys-c.R index b0cc7a9f5..5af569249 100644 --- a/R/join_keys-c.R +++ b/R/join_keys-c.R @@ -14,28 +14,18 @@ #' ) #' ) c.join_keys <- function(...) { - join_keys_obj <- rlang::list2(...)[[1]] - x <- rlang::list2(...)[-1] - checkmate::assert_multi_class(join_keys_obj, classes = c("join_keys", "join_key_set")) + x <- rlang::list2(...) checkmate::assert_list(x, types = c("join_keys", "join_key_set")) - # Ensure base object has correct class when called from c.join_key_set - join_keys_obj <- join_keys(join_keys_obj) - - x_merged <- Reduce( + Reduce( init = join_keys(), x = x, f = function(.x, .y) { - assert_compatible_keys2(.x, .y) out <- utils::modifyList(.x, .y, keep.null = FALSE) parents(out) <- .merge_parents(.x, .y) out } ) - - out <- utils::modifyList(join_keys_obj, x_merged, keep.null = FALSE) - parents(out) <- .merge_parents(join_keys_obj, x_merged) - out } #' @rdname join_keys diff --git a/tests/testthat/test-join_keys-c.R b/tests/testthat/test-join_keys-c.R index 2c68d0779..bbb503e0e 100644 --- a/tests/testthat/test-join_keys-c.R +++ b/tests/testthat/test-join_keys-c.R @@ -78,38 +78,6 @@ testthat::test_that("c.join_keys doesn't throw when second object is empty join_ testthat::expect_no_error(c(x, y)) }) -testthat::test_that("c.join_keys throws on conflicting join_keys_set objects", { - obj <- join_keys() - testthat::expect_error( - c( - obj, - join_keys(join_key("a", "b", "aa")), - join_keys(join_key("b", "a", "bb")) - ), - "cannot specify multiple different join keys between datasets" - ) - - testthat::expect_error( - c( - obj, - join_key("a", "b", "aa"), - join_key("b", "a", "bb") - ), - "cannot specify multiple different join keys between datasets" - ) -}) - -testthat::test_that("c.join_key_set throws on conflicting join_keys_set objects", { - testthat::expect_error( - c( - join_key("a", "b", "aa"), - join_key("a", "b", "ca"), - join_key("a", "b", "cc") - ), - "cannot specify multiple different join keys between datasets" - ) -}) - testthat::test_that("c.join_key_set merges with empty and non-empty parents", { jk1 <- join_keys( join_key("d1", "d1", "a") diff --git a/tests/testthat/test-join_keys.R b/tests/testthat/test-join_keys.R index d849cc0d7..cb305947f 100644 --- a/tests/testthat/test-join_keys.R +++ b/tests/testthat/test-join_keys.R @@ -89,29 +89,6 @@ testthat::test_that("join_keys doesn't accept a list which is identical to outpu testthat::expect_error(join_keys(unclass(key))) }) -testthat::test_that("join_keys fails when provided foreign key pairs have incompatible values", { - testthat::expect_error( - join_keys(join_key("d1", "d2", "a"), join_key("d2", "d1", "b")), - "cannot specify multiple different join keys between datasets" - ) - testthat::expect_error( - join_keys(join_key("d1", "d2", c(a = "b")), join_key("d2", "d1", c(a = "b"))), - "cannot specify multiple different join keys between datasets" - ) - - testthat::expect_error( - join_keys( - join_keys( - join_key("q", "b", "d"), - join_key("a", "b", "c") - ), - join_key("a", "q", "e"), - join_key("a", "b", "f") - ), - "cannot specify multiple different join keys between datasets" - ) -}) - testthat::test_that("join_keys constructor adds symmetric keys on given (unnamed) foreign key", { my_keys <- join_keys(join_key("d1", "d2", "a")) expected_keys <- join_keys(join_key("d2", "d1", "a", directed = FALSE)) From 78d4240ad51921bc35e2ed9b1140707d3e2b1e24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Thu, 21 Dec 2023 17:06:00 +0000 Subject: [PATCH 25/27] fix: remove .merge_parents are there are better protections in place --- R/join_keys-c.R | 20 +------------------- man/dot-merge_parents.Rd | 18 ------------------ 2 files changed, 1 insertion(+), 37 deletions(-) delete mode 100644 man/dot-merge_parents.Rd diff --git a/R/join_keys-c.R b/R/join_keys-c.R index 5af569249..70167b1fb 100644 --- a/R/join_keys-c.R +++ b/R/join_keys-c.R @@ -22,7 +22,7 @@ c.join_keys <- function(...) { x = x, f = function(.x, .y) { out <- utils::modifyList(.x, .y, keep.null = FALSE) - parents(out) <- .merge_parents(.x, .y) + parents(out) <- utils::modifyList(attr(.x, "parents"), attr(.y, "parents"), keep.null = FALSE) out } ) @@ -45,21 +45,3 @@ c.join_keys <- function(...) { c.join_key_set <- function(...) { c.join_keys(...) } - -#' Merge parents for 2 `join_keys` object -#' -#' @param x,y (`join_keys`) objects to merge their parents -#' -#' @return a list with parents merged from 2 `join_keys`. Not the object itself. -#' @keywords internal -.merge_parents <- function(x, y) { - x_parent <- list() - y_parent <- list() - if (length(attr(x, "parents"))) { - x_parent <- attr(x, "parents") - } - if (length(attr(y, "parents"))) { - y_parent <- attr(y, "parents") - } - utils::modifyList(x_parent, y_parent, keep.null = FALSE) -} diff --git a/man/dot-merge_parents.Rd b/man/dot-merge_parents.Rd deleted file mode 100644 index f558f28e2..000000000 --- a/man/dot-merge_parents.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/join_keys-c.R -\name{.merge_parents} -\alias{.merge_parents} -\title{Merge parents for 2 \code{join_keys} object} -\usage{ -.merge_parents(x, y) -} -\arguments{ -\item{x, y}{(\code{join_keys}) objects to merge their parents} -} -\value{ -a list with parents merged from 2 \code{join_keys}. Not the object itself. -} -\description{ -Merge parents for 2 \code{join_keys} object -} -\keyword{internal} From 79478789f9b30e2669c8866f0bbdda8e16dad3c8 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 22 Dec 2023 12:38:07 +0530 Subject: [PATCH 26/27] removed unused variable. --- R/join_keys-print.R | 1 - R/join_keys-utils.R | 2 -- 2 files changed, 3 deletions(-) diff --git a/R/join_keys-print.R b/R/join_keys-print.R index f39fef86d..ad231fd78 100644 --- a/R/join_keys-print.R +++ b/R/join_keys-print.R @@ -8,7 +8,6 @@ format.join_keys <- function(x, ...) { names <- union(names_sorted, names(x)) x_implicit <- update_keys_given_parents(x) out <- lapply(names, function(i) { - this_parent <- my_parents[[i]] out_i <- lapply(union(i, names(x[[i]])), function(j) { direction <- if (identical(my_parents[[j]], i)) { " <-- " diff --git a/R/join_keys-utils.R b/R/join_keys-utils.R index b6bca40bf..0696610bd 100644 --- a/R/join_keys-utils.R +++ b/R/join_keys-utils.R @@ -112,12 +112,10 @@ update_keys_given_parents <- function(x) { datanames <- names(jk) for (d1_ix in seq_along(datanames)) { d1 <- datanames[[d1_ix]] - d1_pk <- jk[[d1]][[d1]] d1_parent <- parent(jk, d1) for (d2 in datanames[-1 * seq.int(d1_ix)]) { if (length(jk[[d1]][[d2]]) == 0) { d2_parent <- parent(jk, d2) - d2_pk <- jk[[d2]][[d2]] if (!identical(d1_parent, d2_parent) || length(d1_parent) == 0) next From 86529bec4c928d58882d6279f6df448b89618dbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ver=C3=ADssimo?= <211358+averissimo@users.noreply.github.com> Date: Fri, 22 Dec 2023 10:10:10 +0000 Subject: [PATCH 27/27] empty commit