From d636cf8cedb50cb997a2149d62010ae5811bb97c Mon Sep 17 00:00:00 2001 From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com> Date: Tue, 5 Aug 2025 09:24:15 +0100 Subject: [PATCH 01/12] Format --- R/Jackknife.R | 27 ++++++++++++++------------- man/Jackknife.Rd | 6 ++---- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/R/Jackknife.R b/R/Jackknife.R index 59ebf19d2..3d18b2c61 100644 --- a/R/Jackknife.R +++ b/R/Jackknife.R @@ -3,10 +3,9 @@ #' Resample trees using Jackknife resampling, i.e. removing a subset of #' characters. #' -#' The function assumes -#' that `InitializeData()` will return a morphy object; if this doesn't hold -#' for you, post a [GitHub issue](https://github.com/ms609/TreeSearch/issues/new/) -#' or e-mail the maintainer. +#' The function assumes that `InitializeData()` will return a morphy object; +#' if this doesn't hold for you, post a [GitHub issue]( +#' https://github.com/ms609/TreeSearch/issues/new/) or e-mail the maintainer. #' #' @inheritParams Ratchet #' @param resampleFreq Double between 0 and 1 stating proportion of characters @@ -23,18 +22,18 @@ #' @family split support functions #' @family custom search functions #' @export -Jackknife <- function (tree, dataset, resampleFreq = 2/3, - InitializeData = PhyDat2Morphy, - CleanUpData = UnloadMorphy, - TreeScorer = MorphyLength, - EdgeSwapper = TBRSwap, - jackIter = 5000L, - searchIter = 4000L, searchHits = 42L, - verbosity = 1L, ...) { - # initialize tree and data +Jackknife <- function(tree, dataset, resampleFreq = 2 / 3, + InitializeData = PhyDat2Morphy, + CleanUpData = UnloadMorphy, + TreeScorer = MorphyLength, + EdgeSwapper = TBRSwap, + jackIter = 5000L, searchIter = 4000L, searchHits = 42L, + verbosity = 1L, ...) { + # Initialize tree and data if (dim(tree[["edge"]])[1] != 2 * tree[["Nnode"]]) { stop("tree must be bifurcating; try rooting with ape::root") } + tree <- RenumberTips(tree, names(dataset)) edgeList <- tree[["edge"]] edgeList <- RenumberEdges(edgeList[, 1], edgeList[, 2]) @@ -46,6 +45,7 @@ Jackknife <- function (tree, dataset, resampleFreq = 2/3, eachChar <- seq_along(startWeights) deindexedChars <- rep.int(eachChar, startWeights) charsToKeep <- ceiling(resampleFreq * length(deindexedChars)) + if (charsToKeep < 1L) { stop("resampleFreq of ", resampleFreq, " is too low; can't keep 0 of ", length(deindexedChars), " characters.") @@ -53,6 +53,7 @@ Jackknife <- function (tree, dataset, resampleFreq = 2/3, stop("resampleFreq of ", resampleFreq, " is too high; can't keep all ", length(deindexedChars), " characters.") } + if (verbosity > 10L) { #nocov start message(" * Beginning search:") } #nocov end diff --git a/man/Jackknife.Rd b/man/Jackknife.Rd index 69388c7df..48b7f0237 100644 --- a/man/Jackknife.Rd +++ b/man/Jackknife.Rd @@ -69,10 +69,8 @@ Resample trees using Jackknife resampling, i.e. removing a subset of characters. } \details{ -The function assumes -that \code{InitializeData()} will return a morphy object; if this doesn't hold -for you, post a \href{https://github.com/ms609/TreeSearch/issues/new/}{GitHub issue} -or e-mail the maintainer. +The function assumes that \code{InitializeData()} will return a morphy object; +if this doesn't hold for you, post a \href{https://github.com/ms609/TreeSearch/issues/new/}{GitHub issue} or e-mail the maintainer. } \seealso{ \itemize{ From 6a49c63e68a0c0ebf8ddb5c93d1305d25cdb70d1 Mon Sep 17 00:00:00 2001 From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com> Date: Tue, 5 Aug 2025 10:00:22 +0100 Subject: [PATCH 02/12] Improve docs & format --- R/Jackknife.R | 2 +- R/MaximizeParsimony.R | 36 ++++++++++++++++++++++-------------- man/JackLabels.Rd | 2 +- man/MaximizeParsimony.Rd | 19 +++++++++++++------ 4 files changed, 37 insertions(+), 22 deletions(-) diff --git a/R/Jackknife.R b/R/Jackknife.R index 3d18b2c61..7a084e107 100644 --- a/R/Jackknife.R +++ b/R/Jackknife.R @@ -103,7 +103,7 @@ Jackknife <- function(tree, dataset, resampleFreq = 2 / 3, #' @return A named vector specifying the proportion of jackknife trees #' consistent with each node in `tree`, as plotted. #' If `plot = FALSE`, blank entries are included corresponding to nodes -#' that do not require labelling; the return value is in the value required +#' that do not require labelling; the return value is in the format required #' by `phylo$node.label`. #' #' @examples diff --git a/R/MaximizeParsimony.R b/R/MaximizeParsimony.R index 582b52ded..fcd9c5188 100644 --- a/R/MaximizeParsimony.R +++ b/R/MaximizeParsimony.R @@ -188,14 +188,15 @@ #' #' # Load data for analysis in R #' library("TreeTools") -#' data("congreveLamsdellMatrices", package = "TreeSearch") -#' dataset <- congreveLamsdellMatrices[[42]] +#' data("inapplicable.phyData", package = "TreeSearch") +#' dataset <- inapplicable.phyData[["Asher2005"]] #' #' # A very quick run for demonstration purposes #' trees <- MaximizeParsimony(dataset, ratchIter = 0, startIter = 0, #' tbrIter = 1, maxHits = 4, maxTime = 1/100, #' concavity = 10, verbosity = 4) #' names(trees) +#' cons <- Consensus(trees) #' #' # In actual use, be sure to check that the score has converged on a global #' # optimum, conducting additional iterations and runs as necessary. @@ -216,14 +217,20 @@ #' # Now we must decide what to do with the multiple optimal trees from #' # each replicate. #' -#' # Treat each tree equally -#' JackLabels(ape::consensus(trees), unlist(jackTrees, recursive = FALSE)) +#' # Set graphical parameters for plotting +#' oPar <- par(mar = rep(0, 4), cex = 0.9) +#' +#' # Treat each tree as a separate replicate (problematic) +#' JackLabels(cons, unlist(jackTrees, recursive = FALSE)) #' #' # Take the strict consensus of all trees for each replicate -#' JackLabels(ape::consensus(trees), lapply(jackTrees, ape::consensus)) +#' JackLabels(cons, lapply(jackTrees, ape::consensus)) #' #' # Take a single tree from each replicate (the first; order's irrelevant) -#' JackLabels(ape::consensus(trees), lapply(jackTrees, `[[`, 1)) +#' JackLabels(cons, lapply(jackTrees, `[[`, 1)) +#' +#' # Restore graphical parameters +#' par(oPar) #' } #' #' # Tree search with a constraint @@ -935,17 +942,17 @@ MaximizeParsimony <- function (dataset, tree, #' @family split support functions #' @encoding UTF-8 #' @export -Resample <- function (dataset, tree, method = "jack", - proportion = 2/3, - ratchIter = 1L, tbrIter = 8L, finalIter = 3L, - maxHits = 12L, concavity = Inf, - tolerance = sqrt(.Machine[["double.eps"]]), - constraint, - verbosity = 2L, - ...) { +Resample <- function(dataset, tree, method = "jack", proportion = 2 / 3, + ratchIter = 1L, tbrIter = 8L, finalIter = 3L, + maxHits = 12L, concavity = Inf, + tolerance = sqrt(.Machine[["double.eps"]]), + constraint, verbosity = 2L, + ...) { + if (!inherits(dataset, "phyDat")) { stop("`dataset` must be of class `phyDat`.") } + index <- attr(dataset, "index") kept <- switch(pmatch(tolower(method), c("jackknife", "bootstrap")), { @@ -960,6 +967,7 @@ Resample <- function (dataset, tree, method = "jack", }, { sample(index, length(index), replace = TRUE) }) + if (is.null(kept)) { stop("`method` must be either \"jackknife\" or \"bootstrap\".") } diff --git a/man/JackLabels.Rd b/man/JackLabels.Rd index 73a75cf38..abe12bb39 100644 --- a/man/JackLabels.Rd +++ b/man/JackLabels.Rd @@ -35,7 +35,7 @@ plot.} A named vector specifying the proportion of jackknife trees consistent with each node in \code{tree}, as plotted. If \code{plot = FALSE}, blank entries are included corresponding to nodes -that do not require labelling; the return value is in the value required +that do not require labelling; the return value is in the format required by \code{phylo$node.label}. } \description{ diff --git a/man/MaximizeParsimony.Rd b/man/MaximizeParsimony.Rd index d1d9290d8..3c9facb28 100644 --- a/man/MaximizeParsimony.Rd +++ b/man/MaximizeParsimony.Rd @@ -281,14 +281,15 @@ if (interactive()) { # Load data for analysis in R library("TreeTools") -data("congreveLamsdellMatrices", package = "TreeSearch") -dataset <- congreveLamsdellMatrices[[42]] +data("inapplicable.phyData", package = "TreeSearch") +dataset <- inapplicable.phyData[["Asher2005"]] # A very quick run for demonstration purposes trees <- MaximizeParsimony(dataset, ratchIter = 0, startIter = 0, tbrIter = 1, maxHits = 4, maxTime = 1/100, concavity = 10, verbosity = 4) names(trees) +cons <- Consensus(trees) # In actual use, be sure to check that the score has converged on a global # optimum, conducting additional iterations and runs as necessary. @@ -309,14 +310,20 @@ jackTrees <- replicate(nReplicates, # Now we must decide what to do with the multiple optimal trees from # each replicate. -# Treat each tree equally -JackLabels(ape::consensus(trees), unlist(jackTrees, recursive = FALSE)) +# Set graphical parameters for plotting +oPar <- par(mar = rep(0, 4), cex = 0.9) + +# Treat each tree as a separate replicate (problematic) +JackLabels(cons, unlist(jackTrees, recursive = FALSE)) # Take the strict consensus of all trees for each replicate -JackLabels(ape::consensus(trees), lapply(jackTrees, ape::consensus)) +JackLabels(cons, lapply(jackTrees, ape::consensus)) # Take a single tree from each replicate (the first; order's irrelevant) -JackLabels(ape::consensus(trees), lapply(jackTrees, `[[`, 1)) +JackLabels(cons, lapply(jackTrees, `[[`, 1)) + +# Restore graphical parameters +par(oPar) } # Tree search with a constraint From 18361315c5cbdee046b5a048cdba1059841403a3 Mon Sep 17 00:00:00 2001 From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com> Date: Tue, 5 Aug 2025 10:00:57 +0100 Subject: [PATCH 03/12] Numeric, not character, return --- R/Jackknife.R | 5 +++-- tests/testthat/test-Jackknife.R | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/R/Jackknife.R b/R/Jackknife.R index 7a084e107..52f306de4 100644 --- a/R/Jackknife.R +++ b/R/Jackknife.R @@ -150,8 +150,9 @@ JackLabels <- function (tree, jackTrees, # Return: jackSupport } else { - ret <- character(tree[["Nnode"]]) - ret[as.integer(names(jackSupport)) - NTip(tree)] <- jackSupport + ret <- `length<-`(double(0), tree[["Nnode"]]) + idx <- as.integer(names(jackSupport)) - NTip(tree) + ret[idx] <- jackSupport # Return: ret diff --git a/tests/testthat/test-Jackknife.R b/tests/testthat/test-Jackknife.R index a93d50597..08ad1b32b 100644 --- a/tests/testthat/test-Jackknife.R +++ b/tests/testthat/test-Jackknife.R @@ -31,12 +31,12 @@ test_that("Jackknife ouputs good for node.labels", { jackTrees <- as.phylo(1:100, 8) tree <- as.phylo(0, 8) - expect_equal(c("", "", "0.13", "0.08", "0.14", "1", "1"), - JackLabels(tree, jackTrees, plot = FALSE)) + expect_equal(JackLabels(tree, jackTrees, plot = FALSE), + c(NA_real_, NA_real_, 0.13, 0.08, 0.14, 1, 1)) tree <- RootTree(as.phylo(0, 8), c("t1", "t4")) - expect_equal(c("", "0.08", "0.13", "", "0.14", "1", "1"), - JackLabels(tree, jackTrees, plot = FALSE)) + expect_equal(JackLabels(tree, jackTrees, plot = FALSE), + c(NA_real_, 0.08, 0.13, NA_real_, 0.14, 1, 1)) skip_if_not_installed("vdiffr") vdiffr::expect_doppelganger("plot-jackknife", function() { From 0f0bb0c05881d10bf01e8b45af4deddb81f10e56 Mon Sep 17 00:00:00 2001 From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com> Date: Tue, 5 Aug 2025 10:18:25 +0100 Subject: [PATCH 04/12] Add test --- man/JackLabels.Rd | 21 +++++++++++++++++---- tests/testthat/test-Jackknife.R | 19 +++++++++++++++++++ 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/man/JackLabels.Rd b/man/JackLabels.Rd index abe12bb39..67f7abac6 100644 --- a/man/JackLabels.Rd +++ b/man/JackLabels.Rd @@ -13,6 +13,7 @@ JackLabels( col = NULL, frame = "none", pos = 2L, + showFraction = FALSE, ... ) } @@ -30,13 +31,25 @@ unique split.} plot.} \item{adj, col, frame, pos, \dots}{Parameters to pass to \code{nodelabels()}.} + +\item{showFraction}{Logical specifying whether to also annotate nodes +with the fraction of replicates that were decisive for the split. + +If an element of \code{jackTrees} contains multiple trees, then the iteration is +counted as supporting a split if all trees contain the split, and as +contradicting the split if no trees contain it. If a split is only present +in a subset of trees, that iteration is considered not to be decisive, and +is ignored when calculating the support for that split.} } \value{ -A named vector specifying the proportion of jackknife trees +A named vector specifying the proportion of jackknife iterations consistent with each node in \code{tree}, as plotted. -If \code{plot = FALSE}, blank entries are included corresponding to nodes -that do not require labelling; the return value is in the format required -by \code{phylo$node.label}. +If \code{plot = FALSE}, \code{NA} entries are included corresponding to nodes +that do not require labels, such that the return value is in the format +required by \code{phylo$node.label}. +If multiple trees are specified per iteration, the return value has an +attribute \code{decisive} listing, for each entry in the return value, how many +iterations were decisive for that split. } \description{ Label nodes with jackknife support values diff --git a/tests/testthat/test-Jackknife.R b/tests/testthat/test-Jackknife.R index 08ad1b32b..c3ff76949 100644 --- a/tests/testthat/test-Jackknife.R +++ b/tests/testthat/test-Jackknife.R @@ -44,3 +44,22 @@ test_that("Jackknife ouputs good for node.labels", { unname(JackLabels(tree, jackTrees))) }) }) + +test_that("JackLabels() handles multiple trees per iteration", { + tree <- BalancedTree(5) + plot(tree) + nodelabels() + dispute8 <- ape::read.tree(text = "(((t1, t3), t2), (t4, t5));") + disagree <- ape::read.tree(text = "(((t5, t2), t3), (t4, t1));") + jackTrees <- list( + c(dispute8, dispute8), + c(tree, tree), + c(dispute8, tree), + c(disagree, disagree, disagree), + BalancedTree(5) + ) + expect_equal( + JackLabels(tree, jackTrees), + structure(c("7" = 4 / 5, "8" = 2 / 4), decisive = c("7" = 5, "8" = 4)) + ) +}) From ffeaecdb93ff04cfe19b4d6dba79542cccc7e3ab Mon Sep 17 00:00:00 2001 From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com> Date: Tue, 5 Aug 2025 10:18:37 +0100 Subject: [PATCH 05/12] Document and implement --- R/Jackknife.R | 52 +++++++++++++++++++++++++++++++++++----- R/MaximizeParsimony.R | 11 +++++++++ man/MaximizeParsimony.Rd | 11 +++++++++ 3 files changed, 68 insertions(+), 6 deletions(-) diff --git a/R/Jackknife.R b/R/Jackknife.R index 52f306de4..e558e84ed 100644 --- a/R/Jackknife.R +++ b/R/Jackknife.R @@ -99,12 +99,23 @@ Jackknife <- function(tree, dataset, resampleFreq = 2 / 3, #' @param plot Logical specifying whether to plot results; if `FALSE`, #' returns blank labels for nodes near the root that do not correspond to a #' unique split. +#' @param showFraction Logical specifying whether to also annotate nodes +#' with the fraction of replicates that were decisive for the split. #' -#' @return A named vector specifying the proportion of jackknife trees +#' If an element of `jackTrees` contains multiple trees, then the iteration is +#' counted as supporting a split if all trees contain the split, and as +#' contradicting the split if no trees contain it. If a split is only present +#' in a subset of trees, that iteration is considered not to be decisive, and +#' is ignored when calculating the support for that split. +#' +#' @return A named vector specifying the proportion of jackknife iterations #' consistent with each node in `tree`, as plotted. -#' If `plot = FALSE`, blank entries are included corresponding to nodes -#' that do not require labelling; the return value is in the format required -#' by `phylo$node.label`. +#' If `plot = FALSE`, `NA` entries are included corresponding to nodes +#' that do not require labels, such that the return value is in the format +#' required by `phylo$node.label`. +#' If multiple trees are specified per iteration, the return value has an +#' attribute `decisive` listing, for each entry in the return value, how many +#' iterations were decisive for that split. #' #' @examples #' library("TreeTools", quietly = TRUE) # for as.phylo @@ -135,15 +146,39 @@ JackLabels <- function (tree, jackTrees, plot = TRUE, add = FALSE, adj = 0, col = NULL, frame = "none", pos = 2L, + showFraction = FALSE, ...) { - jackSupport <- SplitFrequency(tree, jackTrees) / length(jackTrees) + nJack <- length(jackTrees) + multi <- vapply(jackTrees, inherits, TRUE, "multiPhylo") + if (any(multi)) { + jackTrees[!multi] <- lapply(jackTrees[!multi], c) + supports <- vapply(jackTrees, function(trees) { + freq <- SplitFrequency(tree, trees) + ifelse(freq == 0, FALSE, ifelse(freq == length(trees), TRUE, NA)) + }, logical(NSplits(tree))) + numerator <- rowSums(supports, na.rm = TRUE) + denominator <- rowSums(!is.na(supports)) + jackSupport <- structure(numerator / denominator, decisive = denominator) + } else { + jackSupport <- SplitFrequency(tree, jackTrees) / nJack + } + if (plot) { if (!add) plot(tree) if (is.null(col)) { col <- SupportColour(jackSupport) } - nodelabels(paste("\n\n", signif(jackSupport, 2)), + fracText <- if(isTRUE(showFraction)) { + if (!any(multi)) { + numerator <- jackSupport * nJack + denominator <- nJack + } + paste0("(", numerator, " / ", denominator, ")") + } else { + character(0) + } + nodelabels(paste("\n\n", signif(jackSupport, 2), fracText), node = as.integer(names(jackSupport)), adj = adj, col = col, pos = pos, frame = frame, ...) @@ -153,6 +188,11 @@ JackLabels <- function (tree, jackTrees, ret <- `length<-`(double(0), tree[["Nnode"]]) idx <- as.integer(names(jackSupport)) - NTip(tree) ret[idx] <- jackSupport + if (!is.null(attr(jackSupport, "decisive"))) { + decisive <- `length<-`(integer(0), tree[["Nnode"]]) + decisive[idx] <- attr(jackSupport, "decisive") + attr(ret, "decisive") <- decisive + } # Return: ret diff --git a/R/MaximizeParsimony.R b/R/MaximizeParsimony.R index fcd9c5188..4788f5a3f 100644 --- a/R/MaximizeParsimony.R +++ b/R/MaximizeParsimony.R @@ -229,6 +229,17 @@ #' # Take a single tree from each replicate (the first; order's irrelevant) #' JackLabels(cons, lapply(jackTrees, `[[`, 1)) #' +#' # Count support if all most parsimonious trees support a split; +#' # contradiction if all trees contradict it; don't include replicates where +#' # not all trees agree on the resolution of a split. +#' labels <- JackLabels(cons, jackTrees) +#' +#' # How many iterations were decisive for each node? +#' attr(labels, "decisive") +#' +#' # Show as proportion +#' JackLabels(cons, jackTrees, showFrac = TRUE) +#' #' # Restore graphical parameters #' par(oPar) #' } diff --git a/man/MaximizeParsimony.Rd b/man/MaximizeParsimony.Rd index 3c9facb28..32ceb4831 100644 --- a/man/MaximizeParsimony.Rd +++ b/man/MaximizeParsimony.Rd @@ -322,6 +322,17 @@ JackLabels(cons, lapply(jackTrees, ape::consensus)) # Take a single tree from each replicate (the first; order's irrelevant) JackLabels(cons, lapply(jackTrees, `[[`, 1)) +# Count support if all most parsimonious trees support a split; +# contradiction if all trees contradict it; don't include replicates where +# not all trees agree on the resolution of a split. +labels <- JackLabels(cons, jackTrees) + +# How many iterations were decisive for each node? +attr(labels, "decisive") + +# Show as proportion +JackLabels(cons, jackTrees, showFrac = TRUE) + # Restore graphical parameters par(oPar) } From 5ef5e28e6d17b4f1e89367e38863a0c8e8482a87 Mon Sep 17 00:00:00 2001 From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com> Date: Tue, 5 Aug 2025 10:22:49 +0100 Subject: [PATCH 06/12] v1.6.1.9005 --- DESCRIPTION | 2 +- NEWS.md | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 94b2fcceb..963ce4ee8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: TreeSearch Title: Phylogenetic Analysis with Discrete Character Data -Version: 1.6.1.9004 +Version: 1.6.1.9005 Authors@R: c( person( "Martin R.", 'Smith', diff --git a/NEWS.md b/NEWS.md index 97ba6899b..94599506e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,7 @@ -# TreeSearch 1.6.1.9004 (development) +# TreeSearch 1.6.1.9005 (development) + +- `JackLabels()` supports multiple trees per iteration + (#197)[https://github.com/ms609/TreeSearch/discussions/197] - `PresCont()` implements the Group Present / Contradicted measure of Goloboff et al. (2003). - Support single-character matrices in `ClusteringConcordance()` From 1477880ba48c7dbbfe6328bc5622fde435ff4450 Mon Sep 17 00:00:00 2001 From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com> Date: Tue, 5 Aug 2025 12:24:59 +0100 Subject: [PATCH 07/12] Output formats --- R/Jackknife.R | 69 ++++++++++++++++++++------------- tests/testthat/test-Jackknife.R | 20 +++++++--- 2 files changed, 56 insertions(+), 33 deletions(-) diff --git a/R/Jackknife.R b/R/Jackknife.R index e558e84ed..5ce9e04ea 100644 --- a/R/Jackknife.R +++ b/R/Jackknife.R @@ -27,7 +27,7 @@ Jackknife <- function(tree, dataset, resampleFreq = 2 / 3, CleanUpData = UnloadMorphy, TreeScorer = MorphyLength, EdgeSwapper = TBRSwap, - jackIter = 5000L, searchIter = 4000L, searchHits = 42L, + jackIter = 5000L, searchIter = 4000L, searchHits = 42L, verbosity = 1L, ...) { # Initialize tree and data if (dim(tree[["edge"]])[1] != 2 * tree[["Nnode"]]) { @@ -101,6 +101,10 @@ Jackknife <- function(tree, dataset, resampleFreq = 2 / 3, #' unique split. #' @param showFraction Logical specifying whether to also annotate nodes #' with the fraction of replicates that were decisive for the split. +#' @param format Character specifying return format. +#' `"character"` returns a character string suitable to add to the `node.labels` +#' attribute of a tree; +#' "numeric" returns numeric values suitable for further analysis. #' #' If an element of `jackTrees` contains multiple trees, then the iteration is #' counted as supporting a split if all trees contain the split, and as @@ -110,7 +114,7 @@ Jackknife <- function(tree, dataset, resampleFreq = 2 / 3, #' #' @return A named vector specifying the proportion of jackknife iterations #' consistent with each node in `tree`, as plotted. -#' If `plot = FALSE`, `NA` entries are included corresponding to nodes +#' If `format = "character"`, blank entries are included corresponding to nodes #' that do not require labels, such that the return value is in the format #' required by `phylo$node.label`. #' If multiple trees are specified per iteration, the return value has an @@ -146,7 +150,7 @@ JackLabels <- function (tree, jackTrees, plot = TRUE, add = FALSE, adj = 0, col = NULL, frame = "none", pos = 2L, - showFraction = FALSE, + showFraction = FALSE, format = "character", ...) { nJack <- length(jackTrees) multi <- vapply(jackTrees, inherits, TRUE, "multiPhylo") @@ -163,38 +167,47 @@ JackLabels <- function (tree, jackTrees, jackSupport <- SplitFrequency(tree, jackTrees) / nJack } + fracText <- if(isTRUE(showFraction)) { + if (!any(multi)) { + numerator <- jackSupport * nJack + denominator <- nJack + } + paste0("{", numerator, " / ", denominator, "}") + } else { + character(0) + } if (plot) { if (!add) plot(tree) if (is.null(col)) { col <- SupportColour(jackSupport) } - fracText <- if(isTRUE(showFraction)) { - if (!any(multi)) { - numerator <- jackSupport * nJack - denominator <- nJack - } - paste0("(", numerator, " / ", denominator, ")") - } else { - character(0) - } - nodelabels(paste("\n\n", signif(jackSupport, 2), fracText), + nodelabels(paste("\n\n", signif(jackSupport, 2), + gsub("{", "(", fixed = TRUE, + gsub("}", ")", fixed = TRUE, fracText))), node = as.integer(names(jackSupport)), adj = adj, col = col, pos = pos, frame = frame, ...) - - # Return: - jackSupport - } else { - ret <- `length<-`(double(0), tree[["Nnode"]]) - idx <- as.integer(names(jackSupport)) - NTip(tree) - ret[idx] <- jackSupport - if (!is.null(attr(jackSupport, "decisive"))) { - decisive <- `length<-`(integer(0), tree[["Nnode"]]) - decisive[idx] <- attr(jackSupport, "decisive") - attr(ret, "decisive") <- decisive - } - - # Return: - ret } + + numeric <- c("numeric", "number", "double") + character <- c("character", "text") + returnMode <- c(rep("numeric", length(numeric)), + rep("character", length(character)))[ + pmatch(tolower(format), c(numeric, character), duplicates.ok = TRUE)] + + # Return: + switch( + returnMode, + "character" = { + ret <- character(tree[["Nnode"]]) + idx <- as.integer(names(jackSupport)) - NTip(tree) + + ret[idx] <- if (isTRUE(showFraction)) { + paste(jackSupport, fracText) + } else { + jackSupport + } + ret + }, jackSupport + ) } diff --git a/tests/testthat/test-Jackknife.R b/tests/testthat/test-Jackknife.R index c3ff76949..c6d9191ba 100644 --- a/tests/testthat/test-Jackknife.R +++ b/tests/testthat/test-Jackknife.R @@ -31,12 +31,14 @@ test_that("Jackknife ouputs good for node.labels", { jackTrees <- as.phylo(1:100, 8) tree <- as.phylo(0, 8) - expect_equal(JackLabels(tree, jackTrees, plot = FALSE), - c(NA_real_, NA_real_, 0.13, 0.08, 0.14, 1, 1)) + expect_equal(JackLabels(tree, jackTrees, plot = FALSE, format = "char"), + c("", "", 0.13, 0.08, 0.14, 1, 1)) tree <- RootTree(as.phylo(0, 8), c("t1", "t4")) - expect_equal(JackLabels(tree, jackTrees, plot = FALSE), - c(NA_real_, 0.08, 0.13, NA_real_, 0.14, 1, 1)) + expect_equal(JackLabels(tree, jackTrees, plot = FALSE, format = "text"), + c("", 0.08, 0.13, "", 0.14, 1, 1)) + expect_equal(JackLabels(tree, jackTrees, plot = FALSE, format = "num"), + setNames(c(0.08, 0.13, 0.14, 1, 1), c(10, 11, 13:15))) skip_if_not_installed("vdiffr") vdiffr::expect_doppelganger("plot-jackknife", function() { @@ -59,7 +61,15 @@ test_that("JackLabels() handles multiple trees per iteration", { BalancedTree(5) ) expect_equal( - JackLabels(tree, jackTrees), + JackLabels(tree, jackTrees, plot = FALSE), structure(c("7" = 4 / 5, "8" = 2 / 4), decisive = c("7" = 5, "8" = 4)) ) + + lab <- JackLabels(tree, jackTrees, format = "character", showFraction = TRUE, + plot = FALSE) + tree[["node.label"]] <- lab + expect_equal(gsub("_", " ", fixed = TRUE, + ape::read.tree(text = ape::write.tree(tree) + )[["node.label"]]), + lab) }) From 7d9441c5e1ef12a851fcaac2602dbda97eee77c3 Mon Sep 17 00:00:00 2001 From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com> Date: Tue, 5 Aug 2025 12:30:21 +0100 Subject: [PATCH 08/12] Update tests --- R/Jackknife.R | 15 +++++++++------ .../testthat/_snaps/Jackknife/plot-jackknife.svg | 10 +++++----- tests/testthat/test-Jackknife.R | 6 ++---- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/R/Jackknife.R b/R/Jackknife.R index 5ce9e04ea..b9f08f516 100644 --- a/R/Jackknife.R +++ b/R/Jackknife.R @@ -90,6 +90,15 @@ Jackknife <- function(tree, dataset, resampleFreq = 2 / 3, #' Label nodes with jackknife support values #' +#' `JackLabels()` produces a list of node labels denoting split support from +#' a set of resampled trees, optionally printing them on a tree. +#' +#' If an element of `jackTrees` contains multiple trees, then the iteration is +#' counted as supporting a split if all trees contain the split, and as +#' contradicting the split if no trees contain it. If a split is only present +#' in a subset of trees, that iteration is considered not to be decisive, and +#' is ignored when calculating the support for that split. +#' #' @inheritParams TreeTools::Renumber #' @param jackTrees A list or `multiPhylo` object containing trees generated #' by [`Resample()`] or [`Jackknife()`]. @@ -106,12 +115,6 @@ Jackknife <- function(tree, dataset, resampleFreq = 2 / 3, #' attribute of a tree; #' "numeric" returns numeric values suitable for further analysis. #' -#' If an element of `jackTrees` contains multiple trees, then the iteration is -#' counted as supporting a split if all trees contain the split, and as -#' contradicting the split if no trees contain it. If a split is only present -#' in a subset of trees, that iteration is considered not to be decisive, and -#' is ignored when calculating the support for that split. -#' #' @return A named vector specifying the proportion of jackknife iterations #' consistent with each node in `tree`, as plotted. #' If `format = "character"`, blank entries are included corresponding to nodes diff --git a/tests/testthat/_snaps/Jackknife/plot-jackknife.svg b/tests/testthat/_snaps/Jackknife/plot-jackknife.svg index 381c69744..594a3f43a 100644 --- a/tests/testthat/_snaps/Jackknife/plot-jackknife.svg +++ b/tests/testthat/_snaps/Jackknife/plot-jackknife.svg @@ -56,18 +56,18 @@ t8 - 0.08 + 0.08 - 0.13 + 0.13 - 0.14 + 0.14 - 1 + 1 - 1 + 1 diff --git a/tests/testthat/test-Jackknife.R b/tests/testthat/test-Jackknife.R index c6d9191ba..9a2622027 100644 --- a/tests/testthat/test-Jackknife.R +++ b/tests/testthat/test-Jackknife.R @@ -42,15 +42,13 @@ test_that("Jackknife ouputs good for node.labels", { skip_if_not_installed("vdiffr") vdiffr::expect_doppelganger("plot-jackknife", function() { - expect_equal(as.double(JackLabels(tree, jackTrees, plot = FALSE)[-c(1, 4)]), + expect_equal(JackLabels(tree, jackTrees, plot = FALSE), unname(JackLabels(tree, jackTrees))) }) }) test_that("JackLabels() handles multiple trees per iteration", { tree <- BalancedTree(5) - plot(tree) - nodelabels() dispute8 <- ape::read.tree(text = "(((t1, t3), t2), (t4, t5));") disagree <- ape::read.tree(text = "(((t5, t2), t3), (t4, t1));") jackTrees <- list( @@ -61,7 +59,7 @@ test_that("JackLabels() handles multiple trees per iteration", { BalancedTree(5) ) expect_equal( - JackLabels(tree, jackTrees, plot = FALSE), + JackLabels(tree, jackTrees, plot = FALSE, format = "Double"), structure(c("7" = 4 / 5, "8" = 2 / 4), decisive = c("7" = 5, "8" = 4)) ) From 6fe1f9163b7530060f3dfebc43fb0f075bc1d229 Mon Sep 17 00:00:00 2001 From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com> Date: Tue, 5 Aug 2025 12:31:48 +0100 Subject: [PATCH 09/12] Update JackLabels.Rd --- man/JackLabels.Rd | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/man/JackLabels.Rd b/man/JackLabels.Rd index 67f7abac6..25f9236f2 100644 --- a/man/JackLabels.Rd +++ b/man/JackLabels.Rd @@ -14,6 +14,7 @@ JackLabels( frame = "none", pos = 2L, showFraction = FALSE, + format = "character", ... ) } @@ -33,18 +34,17 @@ plot.} \item{adj, col, frame, pos, \dots}{Parameters to pass to \code{nodelabels()}.} \item{showFraction}{Logical specifying whether to also annotate nodes -with the fraction of replicates that were decisive for the split. +with the fraction of replicates that were decisive for the split.} -If an element of \code{jackTrees} contains multiple trees, then the iteration is -counted as supporting a split if all trees contain the split, and as -contradicting the split if no trees contain it. If a split is only present -in a subset of trees, that iteration is considered not to be decisive, and -is ignored when calculating the support for that split.} +\item{format}{Character specifying return format. +\code{"character"} returns a character string suitable to add to the \code{node.labels} +attribute of a tree; +"numeric" returns numeric values suitable for further analysis.} } \value{ A named vector specifying the proportion of jackknife iterations consistent with each node in \code{tree}, as plotted. -If \code{plot = FALSE}, \code{NA} entries are included corresponding to nodes +If \code{format = "character"}, blank entries are included corresponding to nodes that do not require labels, such that the return value is in the format required by \code{phylo$node.label}. If multiple trees are specified per iteration, the return value has an @@ -52,7 +52,15 @@ attribute \code{decisive} listing, for each entry in the return value, how many iterations were decisive for that split. } \description{ -Label nodes with jackknife support values +\code{JackLabels()} produces a list of node labels denoting split support from +a set of resampled trees, optionally printing them on a tree. +} +\details{ +If an element of \code{jackTrees} contains multiple trees, then the iteration is +counted as supporting a split if all trees contain the split, and as +contradicting the split if no trees contain it. If a split is only present +in a subset of trees, that iteration is considered not to be decisive, and +is ignored when calculating the support for that split. } \examples{ library("TreeTools", quietly = TRUE) # for as.phylo From d61216792022abb940c15a48de75c7bf6140b512 Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Tue, 5 Aug 2025 14:45:12 +0100 Subject: [PATCH 10/12] github.com NEWS url --- inst/WORDLIST | 2 ++ 1 file changed, 2 insertions(+) diff --git a/inst/WORDLIST b/inst/WORDLIST index 0bd6cd091..6ad16dccc 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -185,6 +185,7 @@ cf cla codecov colourblind +com dataset's dd doi @@ -195,6 +196,7 @@ entelegyne equiprobable ffmpeg frac +github gnathostome homoplasies homoplasious From eebe76ff57e458f8446bf1db4e29faf83bfc179b Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Tue, 5 Aug 2025 14:46:30 +0100 Subject: [PATCH 11/12] Import NSplits --- NAMESPACE | 1 + R/Jackknife.R | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index a8f2ad088..79e40d690 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -161,6 +161,7 @@ importFrom(TreeTools,LogDoubleFactorial) importFrom(TreeTools,MakeTreeBinary) importFrom(TreeTools,MatrixToPhyDat) importFrom(TreeTools,NRooted) +importFrom(TreeTools,NSplits) importFrom(TreeTools,NTip) importFrom(TreeTools,NUnrooted) importFrom(TreeTools,NUnrootedMult) diff --git a/R/Jackknife.R b/R/Jackknife.R index b9f08f516..2ad1a8ee3 100644 --- a/R/Jackknife.R +++ b/R/Jackknife.R @@ -143,7 +143,7 @@ Jackknife <- function(tree, dataset, resampleFreq = 2 / 3, #' # write.nexus(tree, file = filename) #' @template MRS #' @importFrom ape nodelabels -#' @importFrom TreeTools SplitFrequency SupportColour +#' @importFrom TreeTools NSplits SplitFrequency SupportColour #' @seealso #' Generate trees by jackknife resampling using [`Resample()`] for standard #' parsimony searches, or [`Jackknife()`] for custom search criteria. From d03eb7d20eeabdfebb31b074c5fbc2278caa55cb Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Tue, 5 Aug 2025 14:47:15 +0100 Subject: [PATCH 12/12] Link SplitFrequency --- R/PresentContra.R | 6 +++--- man/PresCont.Rd | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/PresentContra.R b/R/PresentContra.R index 71d6e9866..56a5c98ab 100644 --- a/R/PresentContra.R +++ b/R/PresentContra.R @@ -41,9 +41,9 @@ #' @param adj,col,frame,pos,\dots Parameters to pass to `nodelabels()`. #' #' @seealso -#' [`SplitFrequency()`] and [`MostContradictedFreq()`] will compute the number -#' of trees that contain the split, and the frequency of the most contradicted -#' split, respectively. +#' \code{\link[TreeTools]{SplitFrequency}()} and [`MostContradictedFreq()`] will +#' compute the number of trees that contain the split, and the frequency of the +#' most contradicted split, respectively. #' @references \insertAllCited{} #' @examples #' library("TreeTools", quietly = TRUE) # for as.phylo diff --git a/man/PresCont.Rd b/man/PresCont.Rd index a7481ef19..6a2befbb7 100644 --- a/man/PresCont.Rd +++ b/man/PresCont.Rd @@ -98,9 +98,9 @@ gpc \insertAllCited{} } \seealso{ -\code{\link[=SplitFrequency]{SplitFrequency()}} and \code{\link[=MostContradictedFreq]{MostContradictedFreq()}} will compute the number -of trees that contain the split, and the frequency of the most contradicted -split, respectively. +\code{\link[TreeTools]{SplitFrequency}()} and \code{\link[=MostContradictedFreq]{MostContradictedFreq()}} will +compute the number of trees that contain the split, and the frequency of the +most contradicted split, respectively. Other split support functions: \code{\link{JackLabels}()},