Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
497435d
feat: initial support
averissimo Dec 8, 2023
1b3f662
feat: use match.arg and keep checkmate error
averissimo Dec 11, 2023
a781284
fix: remove redundant line
averissimo Dec 11, 2023
4039252
Merge branch 'main' into 203_default_parents@main
averissimo Dec 11, 2023
9e5abc8
feat: protect against cyclical graphs on const./merge
averissimo Dec 11, 2023
8e5136d
docs: initial pass on vignette
averissimo Dec 11, 2023
6c01c19
fix: correcting check warnings
averissimo Dec 11, 2023
ee0a073
docs: reflecting new API
averissimo Dec 12, 2023
b05cf02
docs: minor changes
averissimo Dec 12, 2023
c7f6c89
tests: better support for default parent behavior
averissimo Dec 12, 2023
8a09b00
lint: adds nolint (hidden) block in vignette
averissimo Dec 12, 2023
a293fb0
feat: remove unecessary iterations and adds coverage
averissimo Dec 12, 2023
3f0f7a6
Merge branch 'main' into 203_default_parents@main
averissimo Dec 12, 2023
a11a663
fix: revert to base::match.arg suggestion from @chlebowa
averissimo Dec 13, 2023
e731ecd
Update R/join_key.R
averissimo Dec 14, 2023
fe50145
[skip actions] Roxygen Man Pages Auto Update
dependabot-preview[bot] Dec 14, 2023
80b5118
feat: change parameter from parent to directed
averissimo Dec 14, 2023
efb3cf6
Merge branch 'main' into 203_default_parents@main
averissimo Dec 14, 2023
886fa72
docs: rephrase parameter
averissimo Dec 14, 2023
edc6ffa
Merge branch 'main' into 203_default_parents@main
averissimo Dec 15, 2023
b1e62fe
Apply suggestions from @kartikeyakirar and @gogonzo
averissimo Dec 20, 2023
5aa0d43
Merge branch 'main' into 203_default_parents@main
averissimo Dec 20, 2023
6ea3a14
Apply suggestions from @chlebowa
averissimo Dec 20, 2023
9249bdd
[skip actions] Roxygen Man Pages Auto Update
dependabot-preview[bot] Dec 20, 2023
62aac3b
fix: remove unused variable
averissimo Dec 20, 2023
ba0ef66
docs: improve on assertion
averissimo Dec 20, 2023
ddd729e
Remove rendundant code from @gogonzo
averissimo Dec 21, 2023
a6c9272
fix: removed redundant test
averissimo Dec 21, 2023
b317708
fix: allow incompatible keys on constructor and c
averissimo Dec 21, 2023
78d4240
fix: remove .merge_parents are there are better protections in place
averissimo Dec 21, 2023
1cf0632
Merge branch 'main' into 203_default_parents@main
kartikeyakirar Dec 22, 2023
7947878
removed unused variable.
kartikeyakirar Dec 22, 2023
74925c5
Merge branch 'main' into 203_default_parents@main
averissimo Dec 22, 2023
86529be
empty commit
averissimo Dec 22, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 23 additions & 9 deletions R/join_key.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,40 @@
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @details `join_key()` will create a relationship for the variables on a pair
#' of datasets.
#' @description Create a relationship between two datasets, `dataset_1` and `dataset_2`.
#' By default, this function establishes a directed relationship with `dataset_1` as the parent.
#' If `dataset_2` is not specified, the function creates a primary key for `dataset_1`.
#'
#' @param dataset_1,dataset_2 (`character(1)`) dataset names. If `dataset_2` is omitted,
#' a primary key for `dataset_1` is created.
#' @param keys (optionally named `character`) where `names(keys)` are columns in `dataset_1`
#' corresponding to columns of `dataset_2` given by the elements of `keys`.
#' @param dataset_1,dataset_2 (`character(1)`) Dataset names. When `dataset_2` is omitted,
#' a primary key for `dataset_1` is created.
#' @param keys (optionally named `character`) Column mapping between the datasets,
#' where `names(keys)` maps columns in `dataset_1` corresponding to columns of
#' `dataset_2` given by the elements of `keys`.
#'
#' If unnamed, the same column names are used for both datasets.
#'
#' If any element of the `keys` vector is empty with a non-empty name, then the name is
#' used for both datasets.
#' @param directed (`logical(1)`) Flag that indicates whether it should create
#' a parent-child relationship between the datasets.\cr
#' - `TRUE` (default) `dataset_1` is the parent of `dataset_2`;
#' - `FALSE` when the relationship is undirected.
#'
#' @return object of class `join_key_set` to be passed into `join_keys` function.
#'
#' @seealso [join_keys()]
#' @seealso [join_keys()], [parents()]
#'
#' @export
#'
#' @examples
#' join_key("d1", "d2", c("A"))
#' join_key("d1", "d2", c("A" = "B"))
#' join_key("d1", "d2", c("A" = "B", "C"))
join_key <- function(dataset_1, dataset_2 = dataset_1, keys) {
join_key <- function(dataset_1, dataset_2 = dataset_1, keys, directed = TRUE) {
checkmate::assert_string(dataset_1)
checkmate::assert_string(dataset_2)
checkmate::assert_character(keys, any.missing = FALSE)
checkmate::assert_flag(directed)

if (length(keys) > 0) {
if (is.null(names(keys))) {
Expand Down Expand Up @@ -65,6 +72,12 @@ join_key <- function(dataset_1, dataset_2 = dataset_1, keys) {
keys <- NULL
}

parents <- if (directed && dataset_1 != dataset_2) {
stats::setNames(list(dataset_1), dataset_2)
} else {
list()
}

structure(
list(
structure(
Expand All @@ -73,6 +86,7 @@ join_key <- function(dataset_1, dataset_2 = dataset_1, keys) {
)
),
names = dataset_1,
class = "join_key_set"
class = "join_key_set",
parents = parents
)
}
38 changes: 5 additions & 33 deletions R/join_keys-c.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,32 +10,22 @@
#' jk,
#' join_keys(
#' join_key("ds4", keys = c("pk4", "pk4_2")),
#' join_key("ds4", "ds3", c(pk4_2 = "pk3"))
#' join_key("ds3", "ds4", c(pk3 = "pk4_2"))
#' )
#' )
c.join_keys <- function(...) {
join_keys_obj <- rlang::list2(...)[[1]]
x <- rlang::list2(...)[-1]
checkmate::assert_multi_class(join_keys_obj, classes = c("join_keys", "join_key_set"))
x <- rlang::list2(...)
checkmate::assert_list(x, types = c("join_keys", "join_key_set"))

# Ensure base object has correct class when called from c.join_key_set
join_keys_obj <- join_keys(join_keys_obj)

x_merged <- Reduce(
Reduce(
init = join_keys(),
x = x,
f = function(.x, .y) {
assert_compatible_keys2(.x, .y)
out <- utils::modifyList(.x, .y, keep.null = FALSE)
attr(out, "parents") <- .merge_parents(.x, .y)
parents(out) <- utils::modifyList(attr(.x, "parents"), attr(.y, "parents"), keep.null = FALSE)
out
}
)

out <- utils::modifyList(join_keys_obj, x_merged, keep.null = FALSE)
attr(out, "parents") <- .merge_parents(join_keys_obj, x_merged)
out
}

#' @rdname join_keys
Expand All @@ -50,26 +40,8 @@ c.join_keys <- function(...) {
#' jk_merged <- c(
#' jk_merged,
#' join_key("ds5", keys = "pk5"),
#' join_key("ds5", "ds1", c(pk5 = "pk1"))
#' join_key("ds1", "ds5", c(pk1 = "pk5"))
#' )
c.join_key_set <- function(...) {
c.join_keys(...)
}

#' Merge parents for 2 `join_keys` object
#'
#' @param x,y (`join_keys`) objects to merge their parents
#'
#' @return a list with parents merged from 2 `join_keys`. Not the object itself.
#' @keywords internal
.merge_parents <- function(x, y) {
x_parent <- list()
y_parent <- list()
if (length(attr(x, "parents"))) {
x_parent <- attr(x, "parents")
}
if (length(attr(y, "parents"))) {
y_parent <- attr(y, "parents")
}
utils::modifyList(x_parent, y_parent, keep.null = FALSE)
}
27 changes: 21 additions & 6 deletions R/join_keys-extract.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@
#' @rdname join_keys
#' @order 2
#'
#' @param directed (`logical(1)`) Flag that indicates whether it should create
#' a parent-child relationship between the datasets.\cr
#' - `TRUE` (default) `dataset_1` is the parent of `dataset_2`;
#' - `FALSE` when the relationship is undirected.
#' @section Functions:
#' - `x[i, j] <- value`: Assignment of a key to pair `(i, j)`.
#' - `x[i] <- value`: This (without `j` parameter) **is not** a supported
Expand All @@ -134,12 +138,13 @@
#'
#' # Setting a single relationship pair ---
#'
#' jk["ds4", "ds1"] <- c("pk4" = "pk1")
#' jk["ds1", "ds4"] <- c("pk1" = "pk4")
#'
#' # Removing a key ---
#'
#' jk["ds5", "ds5"] <- NULL
`[<-.join_keys` <- function(x, i, j, value) {
`[<-.join_keys` <- function(x, i, j, directed = TRUE, value) {
checkmate::assert_flag(directed)
if (missing(i) || missing(j)) {
stop("join_keys[i, j] specify both indices to set a key pair.")
} else if (!missing(i) && is.null(i) || !missing(j) && is.null(j)) {
Expand All @@ -163,8 +168,13 @@
)
}

x[[i]][[j]] <- value
x
# Handle join key removal separately
if (is.null(value)) {
x[[i]][[j]] <- NULL
return(x)
}

c(x, join_key(i, j, value, directed))
}

#' @noRd
Expand Down Expand Up @@ -234,9 +244,14 @@
# Remove classes to use list-based get/assign operations
new_x <- unclass(x)

# In case a pair is removed, also remove the symmetric pair
# In case a pair is removed, also remove the symmetric pair and update parents
removed_names <- setdiff(names(new_x[[i]]), names(norm_value))
for (.x in removed_names) new_x[[.x]][[i]] <- NULL
for (.x in removed_names) {
if (identical(parent(x, .x), i)) attr(new_x, "parents")[[.x]] <- NULL
if (identical(parent(x, i), .x)) attr(new_x, "parents")[[i]] <- NULL

new_x[[.x]][[i]] <- NULL
}

new_x[[i]] <- norm_value

Expand Down
6 changes: 5 additions & 1 deletion R/join_keys-parents.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ parents.teal_data <- function(x) {
new_parents <- list()

for (dataset in names(value)) {
# Custom .var.name so it is verbose and helpful for users
checkmate::assert_string(value[[dataset]], .var.name = sprintf("value[[\"%s\"]]", dataset))

parent <- new_parents[[dataset]]
checkmate::assert(
checkmate::check_null(parent),
Expand All @@ -86,7 +89,8 @@ parents.teal_data <- function(x) {
length(value[[dataset]]) == 0
),
checkmate::check_true(parent == value[[dataset]]),
"Please check the difference between provided datasets parents and provided join_keys parents."
"Please check the difference between provided datasets parents and provided join_keys parents.",
.var.name = "value"
)
if (is.null(parent)) {
new_parents[[dataset]] <- value[[dataset]]
Expand Down
1 change: 0 additions & 1 deletion R/join_keys-print.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ format.join_keys <- function(x, ...) {
names <- union(names_sorted, names(x))
x_implicit <- update_keys_given_parents(x)
out <- lapply(names, function(i) {
this_parent <- my_parents[[i]]
out_i <- lapply(union(i, names(x[[i]])), function(j) {
direction <- if (identical(my_parents[[j]], i)) {
" <-- "
Expand Down
57 changes: 20 additions & 37 deletions R/join_keys-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,47 +110,30 @@ update_keys_given_parents <- function(x) {
checkmate::assert_class(jk, "join_keys", .var.name = checkmate::vname(x))

datanames <- names(jk)
duplicate_pairs <- list()
for (d1 in datanames) {
d1_pk <- jk[[d1]][[d1]]
for (d1_ix in seq_along(datanames)) {
d1 <- datanames[[d1_ix]]
d1_parent <- parent(jk, d1)
for (d2 in datanames) {
if (paste(d2, d1) %in% duplicate_pairs) {
next
}
for (d2 in datanames[-1 * seq.int(d1_ix)]) {
if (length(jk[[d1]][[d2]]) == 0) {
d2_parent <- parent(jk, d2)
d2_pk <- jk[[d2]][[d2]]

fk <- if (identical(d1, d2_parent)) {
# first is parent of second -> parent keys -> first keys
d1_pk
} else if (identical(d1_parent, d2)) {
# second is parent of first -> parent keys -> second keys
d2_pk
} else if (identical(d1_parent, d2_parent) && length(d1_parent) > 0) {
# both has the same parent -> common keys to parent
keys_d1_parent <- sort(jk[[d1]][[d1_parent]])
keys_d2_parent <- sort(jk[[d2]][[d2_parent]])

common_ix_1 <- unname(keys_d1_parent) %in% unname(keys_d2_parent)
common_ix_2 <- unname(keys_d2_parent) %in% unname(keys_d1_parent)

if (all(!common_ix_1)) {
# No common keys between datasets - leave empty
next
}

structure(
names(keys_d2_parent)[common_ix_2],
names = names(keys_d1_parent)[common_ix_1]
)
} else {
# cant find connection - leave empty
next
}

if (!identical(d1_parent, d2_parent) || length(d1_parent) == 0) next

# both has the same parent -> common keys to parent
keys_d1_parent <- sort(jk[[d1]][[d1_parent]])
keys_d2_parent <- sort(jk[[d2]][[d2_parent]])

common_ix_1 <- unname(keys_d1_parent) %in% unname(keys_d2_parent)
common_ix_2 <- unname(keys_d2_parent) %in% unname(keys_d1_parent)

# No common keys between datasets - leave empty
if (all(!common_ix_1)) next

fk <- structure(
names(keys_d2_parent)[common_ix_2],
names = names(keys_d1_parent)[common_ix_1]
)
jk[[d1]][[d2]] <- fk # mutate join key
duplicate_pairs <- append(duplicate_pairs, paste(d1, d2))
}
}
}
Expand Down
30 changes: 18 additions & 12 deletions R/join_keys.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@
#' join_keys(...)
#'
#' @description
#' `join_keys()` facilitates the creation and retrieval of relationships between datasets.
#' `join_keys` class extends a list and contains keys connecting pairs of datasets. Each element
#' of the list contains keys for specific dataset. Each dataset can have a relationship with
#' itself (primary key) and with other datasets.
#' Facilitates the creation and retrieval of relationships between datasets.
#' `join_keys` class extends `list` and contains keys connecting pairs of datasets.
#' Each element of the list contains keys for specific dataset.
#' Each dataset can have a relationship with itself (primary key) and with other datasets.
#'
#' Note that `join_keys` list is symmetrical, that is, when keys are set between `dat1` and `dat2` it
#' is automatically mirrored between `dat2` and `dat1`.
#' Note that `join_keys` list is symmetrical and assumes a default direction, that is:
#' when keys are set between `ds1` and `ds2`, it defines `ds1` as the parent
#' in a parent-child relationship and the mapping is automatically mirrored between
#' `ds2` and `ds1`.
#'
#' @section Methods (by class):
#' - `join_keys()`: Returns an empty `join_keys` object when called without arguments.
Expand All @@ -24,6 +26,12 @@
#' either `teal_data` or `join_keys` to extract `join_keys`, \cr
#' or any number of `join_key_set` objects to create `join_keys`, \cr
#' or nothing to create an empty `join_keys`
#' @param value For `x[i, j, directed = TRUE)] <- value` (named/unnamed `character`)
#' Column mapping between datasets.
#'
#' For `join_keys(x) <- value`: (`join_key_set` or list of `join_key_set`) relationship
#' pairs to add to `join_keys` list.
#'
#'
#' @return `join_keys` object.
#'
Expand All @@ -41,8 +49,8 @@
#' join_key("ds1", "ds1", "pk1"),
#' join_key("ds2", "ds2", "pk2"),
#' join_key("ds3", "ds3", "pk3"),
#' join_key("ds2", "ds1", c(pk2 = "pk1")),
#' join_key("ds3", "ds1", c(pk3 = "pk1"))
#' join_key("ds1", "ds2", c(pk1 = "pk2")),
#' join_key("ds1", "ds3", c(pk1 = "pk3"))
#' )
#'
#' jk
Expand Down Expand Up @@ -91,8 +99,6 @@ join_keys.teal_data <- function(...) {
#' @param x (`join_keys`) empty object to set the new relationship pairs.
#' `x` is typically an object of `join_keys` class. When called with the `join_keys(x)`
#' or `join_keys(x) <- value` then it can also take a supported class (`teal_data`, `join_keys`)
#' @param value (`join_key_set` or list of `join_key_set`) relationship pairs to add
#' to `join_keys` list.
#'
#' @export
`join_keys<-` <- function(x, value) {
Expand All @@ -114,8 +120,8 @@ join_keys.teal_data <- function(...) {
#' join_keys(obj)["ds1", "ds1"] <- "pk1"
#' join_keys(obj)["ds2", "ds2"] <- "pk2"
#' join_keys(obj)["ds3", "ds3"] <- "pk3"
#' join_keys(obj)["ds2", "ds1"] <- c(pk2 = "pk1")
#' join_keys(obj)["ds3", "ds1"] <- c(pk3 = "pk1")
#' join_keys(obj)["ds1", "ds2"] <- c(pk1 = "pk2")
#' join_keys(obj)["ds1", "ds3"] <- c(pk1 = "pk3")
#'
#' identical(jk, join_keys(obj))
`join_keys<-.join_keys` <- function(x, value) {
Expand Down
7 changes: 4 additions & 3 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
Forkers
Getter
Hoffmann
Reproducibility
formatters
funder
Getter
getter
Hoffmann
pre
repo
Reproducibility
reproducibility
undirected
18 changes: 0 additions & 18 deletions man/dot-merge_parents.Rd

This file was deleted.

Loading