From d636cf8cedb50cb997a2149d62010ae5811bb97c Mon Sep 17 00:00:00 2001
From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com>
Date: Tue, 5 Aug 2025 09:24:15 +0100
Subject: [PATCH 01/12] Format
---
R/Jackknife.R | 27 ++++++++++++++-------------
man/Jackknife.Rd | 6 ++----
2 files changed, 16 insertions(+), 17 deletions(-)
diff --git a/R/Jackknife.R b/R/Jackknife.R
index 59ebf19d2..3d18b2c61 100644
--- a/R/Jackknife.R
+++ b/R/Jackknife.R
@@ -3,10 +3,9 @@
#' Resample trees using Jackknife resampling, i.e. removing a subset of
#' characters.
#'
-#' The function assumes
-#' that `InitializeData()` will return a morphy object; if this doesn't hold
-#' for you, post a [GitHub issue](https://github.com/ms609/TreeSearch/issues/new/)
-#' or e-mail the maintainer.
+#' The function assumes that `InitializeData()` will return a morphy object;
+#' if this doesn't hold for you, post a [GitHub issue](
+#' https://github.com/ms609/TreeSearch/issues/new/) or e-mail the maintainer.
#'
#' @inheritParams Ratchet
#' @param resampleFreq Double between 0 and 1 stating proportion of characters
@@ -23,18 +22,18 @@
#' @family split support functions
#' @family custom search functions
#' @export
-Jackknife <- function (tree, dataset, resampleFreq = 2/3,
- InitializeData = PhyDat2Morphy,
- CleanUpData = UnloadMorphy,
- TreeScorer = MorphyLength,
- EdgeSwapper = TBRSwap,
- jackIter = 5000L,
- searchIter = 4000L, searchHits = 42L,
- verbosity = 1L, ...) {
- # initialize tree and data
+Jackknife <- function(tree, dataset, resampleFreq = 2 / 3,
+ InitializeData = PhyDat2Morphy,
+ CleanUpData = UnloadMorphy,
+ TreeScorer = MorphyLength,
+ EdgeSwapper = TBRSwap,
+ jackIter = 5000L, searchIter = 4000L, searchHits = 42L,
+ verbosity = 1L, ...) {
+ # Initialize tree and data
if (dim(tree[["edge"]])[1] != 2 * tree[["Nnode"]]) {
stop("tree must be bifurcating; try rooting with ape::root")
}
+
tree <- RenumberTips(tree, names(dataset))
edgeList <- tree[["edge"]]
edgeList <- RenumberEdges(edgeList[, 1], edgeList[, 2])
@@ -46,6 +45,7 @@ Jackknife <- function (tree, dataset, resampleFreq = 2/3,
eachChar <- seq_along(startWeights)
deindexedChars <- rep.int(eachChar, startWeights)
charsToKeep <- ceiling(resampleFreq * length(deindexedChars))
+
if (charsToKeep < 1L) {
stop("resampleFreq of ", resampleFreq, " is too low; can't keep 0 of ",
length(deindexedChars), " characters.")
@@ -53,6 +53,7 @@ Jackknife <- function (tree, dataset, resampleFreq = 2/3,
stop("resampleFreq of ", resampleFreq, " is too high; can't keep all ",
length(deindexedChars), " characters.")
}
+
if (verbosity > 10L) { #nocov start
message(" * Beginning search:")
} #nocov end
diff --git a/man/Jackknife.Rd b/man/Jackknife.Rd
index 69388c7df..48b7f0237 100644
--- a/man/Jackknife.Rd
+++ b/man/Jackknife.Rd
@@ -69,10 +69,8 @@ Resample trees using Jackknife resampling, i.e. removing a subset of
characters.
}
\details{
-The function assumes
-that \code{InitializeData()} will return a morphy object; if this doesn't hold
-for you, post a \href{https://github.com/ms609/TreeSearch/issues/new/}{GitHub issue}
-or e-mail the maintainer.
+The function assumes that \code{InitializeData()} will return a morphy object;
+if this doesn't hold for you, post a \href{https://github.com/ms609/TreeSearch/issues/new/}{GitHub issue} or e-mail the maintainer.
}
\seealso{
\itemize{
From 6a49c63e68a0c0ebf8ddb5c93d1305d25cdb70d1 Mon Sep 17 00:00:00 2001
From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com>
Date: Tue, 5 Aug 2025 10:00:22 +0100
Subject: [PATCH 02/12] Improve docs & format
---
R/Jackknife.R | 2 +-
R/MaximizeParsimony.R | 36 ++++++++++++++++++++++--------------
man/JackLabels.Rd | 2 +-
man/MaximizeParsimony.Rd | 19 +++++++++++++------
4 files changed, 37 insertions(+), 22 deletions(-)
diff --git a/R/Jackknife.R b/R/Jackknife.R
index 3d18b2c61..7a084e107 100644
--- a/R/Jackknife.R
+++ b/R/Jackknife.R
@@ -103,7 +103,7 @@ Jackknife <- function(tree, dataset, resampleFreq = 2 / 3,
#' @return A named vector specifying the proportion of jackknife trees
#' consistent with each node in `tree`, as plotted.
#' If `plot = FALSE`, blank entries are included corresponding to nodes
-#' that do not require labelling; the return value is in the value required
+#' that do not require labelling; the return value is in the format required
#' by `phylo$node.label`.
#'
#' @examples
diff --git a/R/MaximizeParsimony.R b/R/MaximizeParsimony.R
index 582b52ded..fcd9c5188 100644
--- a/R/MaximizeParsimony.R
+++ b/R/MaximizeParsimony.R
@@ -188,14 +188,15 @@
#'
#' # Load data for analysis in R
#' library("TreeTools")
-#' data("congreveLamsdellMatrices", package = "TreeSearch")
-#' dataset <- congreveLamsdellMatrices[[42]]
+#' data("inapplicable.phyData", package = "TreeSearch")
+#' dataset <- inapplicable.phyData[["Asher2005"]]
#'
#' # A very quick run for demonstration purposes
#' trees <- MaximizeParsimony(dataset, ratchIter = 0, startIter = 0,
#' tbrIter = 1, maxHits = 4, maxTime = 1/100,
#' concavity = 10, verbosity = 4)
#' names(trees)
+#' cons <- Consensus(trees)
#'
#' # In actual use, be sure to check that the score has converged on a global
#' # optimum, conducting additional iterations and runs as necessary.
@@ -216,14 +217,20 @@
#' # Now we must decide what to do with the multiple optimal trees from
#' # each replicate.
#'
-#' # Treat each tree equally
-#' JackLabels(ape::consensus(trees), unlist(jackTrees, recursive = FALSE))
+#' # Set graphical parameters for plotting
+#' oPar <- par(mar = rep(0, 4), cex = 0.9)
+#'
+#' # Treat each tree as a separate replicate (problematic)
+#' JackLabels(cons, unlist(jackTrees, recursive = FALSE))
#'
#' # Take the strict consensus of all trees for each replicate
-#' JackLabels(ape::consensus(trees), lapply(jackTrees, ape::consensus))
+#' JackLabels(cons, lapply(jackTrees, ape::consensus))
#'
#' # Take a single tree from each replicate (the first; order's irrelevant)
-#' JackLabels(ape::consensus(trees), lapply(jackTrees, `[[`, 1))
+#' JackLabels(cons, lapply(jackTrees, `[[`, 1))
+#'
+#' # Restore graphical parameters
+#' par(oPar)
#' }
#'
#' # Tree search with a constraint
@@ -935,17 +942,17 @@ MaximizeParsimony <- function (dataset, tree,
#' @family split support functions
#' @encoding UTF-8
#' @export
-Resample <- function (dataset, tree, method = "jack",
- proportion = 2/3,
- ratchIter = 1L, tbrIter = 8L, finalIter = 3L,
- maxHits = 12L, concavity = Inf,
- tolerance = sqrt(.Machine[["double.eps"]]),
- constraint,
- verbosity = 2L,
- ...) {
+Resample <- function(dataset, tree, method = "jack", proportion = 2 / 3,
+ ratchIter = 1L, tbrIter = 8L, finalIter = 3L,
+ maxHits = 12L, concavity = Inf,
+ tolerance = sqrt(.Machine[["double.eps"]]),
+ constraint, verbosity = 2L,
+ ...) {
+
if (!inherits(dataset, "phyDat")) {
stop("`dataset` must be of class `phyDat`.")
}
+
index <- attr(dataset, "index")
kept <- switch(pmatch(tolower(method), c("jackknife", "bootstrap")),
{
@@ -960,6 +967,7 @@ Resample <- function (dataset, tree, method = "jack",
}, {
sample(index, length(index), replace = TRUE)
})
+
if (is.null(kept)) {
stop("`method` must be either \"jackknife\" or \"bootstrap\".")
}
diff --git a/man/JackLabels.Rd b/man/JackLabels.Rd
index 73a75cf38..abe12bb39 100644
--- a/man/JackLabels.Rd
+++ b/man/JackLabels.Rd
@@ -35,7 +35,7 @@ plot.}
A named vector specifying the proportion of jackknife trees
consistent with each node in \code{tree}, as plotted.
If \code{plot = FALSE}, blank entries are included corresponding to nodes
-that do not require labelling; the return value is in the value required
+that do not require labelling; the return value is in the format required
by \code{phylo$node.label}.
}
\description{
diff --git a/man/MaximizeParsimony.Rd b/man/MaximizeParsimony.Rd
index d1d9290d8..3c9facb28 100644
--- a/man/MaximizeParsimony.Rd
+++ b/man/MaximizeParsimony.Rd
@@ -281,14 +281,15 @@ if (interactive()) {
# Load data for analysis in R
library("TreeTools")
-data("congreveLamsdellMatrices", package = "TreeSearch")
-dataset <- congreveLamsdellMatrices[[42]]
+data("inapplicable.phyData", package = "TreeSearch")
+dataset <- inapplicable.phyData[["Asher2005"]]
# A very quick run for demonstration purposes
trees <- MaximizeParsimony(dataset, ratchIter = 0, startIter = 0,
tbrIter = 1, maxHits = 4, maxTime = 1/100,
concavity = 10, verbosity = 4)
names(trees)
+cons <- Consensus(trees)
# In actual use, be sure to check that the score has converged on a global
# optimum, conducting additional iterations and runs as necessary.
@@ -309,14 +310,20 @@ jackTrees <- replicate(nReplicates,
# Now we must decide what to do with the multiple optimal trees from
# each replicate.
-# Treat each tree equally
-JackLabels(ape::consensus(trees), unlist(jackTrees, recursive = FALSE))
+# Set graphical parameters for plotting
+oPar <- par(mar = rep(0, 4), cex = 0.9)
+
+# Treat each tree as a separate replicate (problematic)
+JackLabels(cons, unlist(jackTrees, recursive = FALSE))
# Take the strict consensus of all trees for each replicate
-JackLabels(ape::consensus(trees), lapply(jackTrees, ape::consensus))
+JackLabels(cons, lapply(jackTrees, ape::consensus))
# Take a single tree from each replicate (the first; order's irrelevant)
-JackLabels(ape::consensus(trees), lapply(jackTrees, `[[`, 1))
+JackLabels(cons, lapply(jackTrees, `[[`, 1))
+
+# Restore graphical parameters
+par(oPar)
}
# Tree search with a constraint
From 18361315c5cbdee046b5a048cdba1059841403a3 Mon Sep 17 00:00:00 2001
From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com>
Date: Tue, 5 Aug 2025 10:00:57 +0100
Subject: [PATCH 03/12] Numeric, not character, return
---
R/Jackknife.R | 5 +++--
tests/testthat/test-Jackknife.R | 8 ++++----
2 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/R/Jackknife.R b/R/Jackknife.R
index 7a084e107..52f306de4 100644
--- a/R/Jackknife.R
+++ b/R/Jackknife.R
@@ -150,8 +150,9 @@ JackLabels <- function (tree, jackTrees,
# Return:
jackSupport
} else {
- ret <- character(tree[["Nnode"]])
- ret[as.integer(names(jackSupport)) - NTip(tree)] <- jackSupport
+ ret <- `length<-`(double(0), tree[["Nnode"]])
+ idx <- as.integer(names(jackSupport)) - NTip(tree)
+ ret[idx] <- jackSupport
# Return:
ret
diff --git a/tests/testthat/test-Jackknife.R b/tests/testthat/test-Jackknife.R
index a93d50597..08ad1b32b 100644
--- a/tests/testthat/test-Jackknife.R
+++ b/tests/testthat/test-Jackknife.R
@@ -31,12 +31,12 @@ test_that("Jackknife ouputs good for node.labels", {
jackTrees <- as.phylo(1:100, 8)
tree <- as.phylo(0, 8)
- expect_equal(c("", "", "0.13", "0.08", "0.14", "1", "1"),
- JackLabels(tree, jackTrees, plot = FALSE))
+ expect_equal(JackLabels(tree, jackTrees, plot = FALSE),
+ c(NA_real_, NA_real_, 0.13, 0.08, 0.14, 1, 1))
tree <- RootTree(as.phylo(0, 8), c("t1", "t4"))
- expect_equal(c("", "0.08", "0.13", "", "0.14", "1", "1"),
- JackLabels(tree, jackTrees, plot = FALSE))
+ expect_equal(JackLabels(tree, jackTrees, plot = FALSE),
+ c(NA_real_, 0.08, 0.13, NA_real_, 0.14, 1, 1))
skip_if_not_installed("vdiffr")
vdiffr::expect_doppelganger("plot-jackknife", function() {
From 0f0bb0c05881d10bf01e8b45af4deddb81f10e56 Mon Sep 17 00:00:00 2001
From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com>
Date: Tue, 5 Aug 2025 10:18:25 +0100
Subject: [PATCH 04/12] Add test
---
man/JackLabels.Rd | 21 +++++++++++++++++----
tests/testthat/test-Jackknife.R | 19 +++++++++++++++++++
2 files changed, 36 insertions(+), 4 deletions(-)
diff --git a/man/JackLabels.Rd b/man/JackLabels.Rd
index abe12bb39..67f7abac6 100644
--- a/man/JackLabels.Rd
+++ b/man/JackLabels.Rd
@@ -13,6 +13,7 @@ JackLabels(
col = NULL,
frame = "none",
pos = 2L,
+ showFraction = FALSE,
...
)
}
@@ -30,13 +31,25 @@ unique split.}
plot.}
\item{adj, col, frame, pos, \dots}{Parameters to pass to \code{nodelabels()}.}
+
+\item{showFraction}{Logical specifying whether to also annotate nodes
+with the fraction of replicates that were decisive for the split.
+
+If an element of \code{jackTrees} contains multiple trees, then the iteration is
+counted as supporting a split if all trees contain the split, and as
+contradicting the split if no trees contain it. If a split is only present
+in a subset of trees, that iteration is considered not to be decisive, and
+is ignored when calculating the support for that split.}
}
\value{
-A named vector specifying the proportion of jackknife trees
+A named vector specifying the proportion of jackknife iterations
consistent with each node in \code{tree}, as plotted.
-If \code{plot = FALSE}, blank entries are included corresponding to nodes
-that do not require labelling; the return value is in the format required
-by \code{phylo$node.label}.
+If \code{plot = FALSE}, \code{NA} entries are included corresponding to nodes
+that do not require labels, such that the return value is in the format
+required by \code{phylo$node.label}.
+If multiple trees are specified per iteration, the return value has an
+attribute \code{decisive} listing, for each entry in the return value, how many
+iterations were decisive for that split.
}
\description{
Label nodes with jackknife support values
diff --git a/tests/testthat/test-Jackknife.R b/tests/testthat/test-Jackknife.R
index 08ad1b32b..c3ff76949 100644
--- a/tests/testthat/test-Jackknife.R
+++ b/tests/testthat/test-Jackknife.R
@@ -44,3 +44,22 @@ test_that("Jackknife ouputs good for node.labels", {
unname(JackLabels(tree, jackTrees)))
})
})
+
+test_that("JackLabels() handles multiple trees per iteration", {
+ tree <- BalancedTree(5)
+ plot(tree)
+ nodelabels()
+ dispute8 <- ape::read.tree(text = "(((t1, t3), t2), (t4, t5));")
+ disagree <- ape::read.tree(text = "(((t5, t2), t3), (t4, t1));")
+ jackTrees <- list(
+ c(dispute8, dispute8),
+ c(tree, tree),
+ c(dispute8, tree),
+ c(disagree, disagree, disagree),
+ BalancedTree(5)
+ )
+ expect_equal(
+ JackLabels(tree, jackTrees),
+ structure(c("7" = 4 / 5, "8" = 2 / 4), decisive = c("7" = 5, "8" = 4))
+ )
+})
From ffeaecdb93ff04cfe19b4d6dba79542cccc7e3ab Mon Sep 17 00:00:00 2001
From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com>
Date: Tue, 5 Aug 2025 10:18:37 +0100
Subject: [PATCH 05/12] Document and implement
---
R/Jackknife.R | 52 +++++++++++++++++++++++++++++++++++-----
R/MaximizeParsimony.R | 11 +++++++++
man/MaximizeParsimony.Rd | 11 +++++++++
3 files changed, 68 insertions(+), 6 deletions(-)
diff --git a/R/Jackknife.R b/R/Jackknife.R
index 52f306de4..e558e84ed 100644
--- a/R/Jackknife.R
+++ b/R/Jackknife.R
@@ -99,12 +99,23 @@ Jackknife <- function(tree, dataset, resampleFreq = 2 / 3,
#' @param plot Logical specifying whether to plot results; if `FALSE`,
#' returns blank labels for nodes near the root that do not correspond to a
#' unique split.
+#' @param showFraction Logical specifying whether to also annotate nodes
+#' with the fraction of replicates that were decisive for the split.
#'
-#' @return A named vector specifying the proportion of jackknife trees
+#' If an element of `jackTrees` contains multiple trees, then the iteration is
+#' counted as supporting a split if all trees contain the split, and as
+#' contradicting the split if no trees contain it. If a split is only present
+#' in a subset of trees, that iteration is considered not to be decisive, and
+#' is ignored when calculating the support for that split.
+#'
+#' @return A named vector specifying the proportion of jackknife iterations
#' consistent with each node in `tree`, as plotted.
-#' If `plot = FALSE`, blank entries are included corresponding to nodes
-#' that do not require labelling; the return value is in the format required
-#' by `phylo$node.label`.
+#' If `plot = FALSE`, `NA` entries are included corresponding to nodes
+#' that do not require labels, such that the return value is in the format
+#' required by `phylo$node.label`.
+#' If multiple trees are specified per iteration, the return value has an
+#' attribute `decisive` listing, for each entry in the return value, how many
+#' iterations were decisive for that split.
#'
#' @examples
#' library("TreeTools", quietly = TRUE) # for as.phylo
@@ -135,15 +146,39 @@ JackLabels <- function (tree, jackTrees,
plot = TRUE,
add = FALSE,
adj = 0, col = NULL, frame = "none", pos = 2L,
+ showFraction = FALSE,
...) {
- jackSupport <- SplitFrequency(tree, jackTrees) / length(jackTrees)
+ nJack <- length(jackTrees)
+ multi <- vapply(jackTrees, inherits, TRUE, "multiPhylo")
+ if (any(multi)) {
+ jackTrees[!multi] <- lapply(jackTrees[!multi], c)
+ supports <- vapply(jackTrees, function(trees) {
+ freq <- SplitFrequency(tree, trees)
+ ifelse(freq == 0, FALSE, ifelse(freq == length(trees), TRUE, NA))
+ }, logical(NSplits(tree)))
+ numerator <- rowSums(supports, na.rm = TRUE)
+ denominator <- rowSums(!is.na(supports))
+ jackSupport <- structure(numerator / denominator, decisive = denominator)
+ } else {
+ jackSupport <- SplitFrequency(tree, jackTrees) / nJack
+ }
+
if (plot) {
if (!add) plot(tree)
if (is.null(col)) {
col <- SupportColour(jackSupport)
}
- nodelabels(paste("\n\n", signif(jackSupport, 2)),
+ fracText <- if(isTRUE(showFraction)) {
+ if (!any(multi)) {
+ numerator <- jackSupport * nJack
+ denominator <- nJack
+ }
+ paste0("(", numerator, " / ", denominator, ")")
+ } else {
+ character(0)
+ }
+ nodelabels(paste("\n\n", signif(jackSupport, 2), fracText),
node = as.integer(names(jackSupport)),
adj = adj, col = col, pos = pos, frame = frame, ...)
@@ -153,6 +188,11 @@ JackLabels <- function (tree, jackTrees,
ret <- `length<-`(double(0), tree[["Nnode"]])
idx <- as.integer(names(jackSupport)) - NTip(tree)
ret[idx] <- jackSupport
+ if (!is.null(attr(jackSupport, "decisive"))) {
+ decisive <- `length<-`(integer(0), tree[["Nnode"]])
+ decisive[idx] <- attr(jackSupport, "decisive")
+ attr(ret, "decisive") <- decisive
+ }
# Return:
ret
diff --git a/R/MaximizeParsimony.R b/R/MaximizeParsimony.R
index fcd9c5188..4788f5a3f 100644
--- a/R/MaximizeParsimony.R
+++ b/R/MaximizeParsimony.R
@@ -229,6 +229,17 @@
#' # Take a single tree from each replicate (the first; order's irrelevant)
#' JackLabels(cons, lapply(jackTrees, `[[`, 1))
#'
+#' # Count support if all most parsimonious trees support a split;
+#' # contradiction if all trees contradict it; don't include replicates where
+#' # not all trees agree on the resolution of a split.
+#' labels <- JackLabels(cons, jackTrees)
+#'
+#' # How many iterations were decisive for each node?
+#' attr(labels, "decisive")
+#'
+#' # Show as proportion
+#' JackLabels(cons, jackTrees, showFrac = TRUE)
+#'
#' # Restore graphical parameters
#' par(oPar)
#' }
diff --git a/man/MaximizeParsimony.Rd b/man/MaximizeParsimony.Rd
index 3c9facb28..32ceb4831 100644
--- a/man/MaximizeParsimony.Rd
+++ b/man/MaximizeParsimony.Rd
@@ -322,6 +322,17 @@ JackLabels(cons, lapply(jackTrees, ape::consensus))
# Take a single tree from each replicate (the first; order's irrelevant)
JackLabels(cons, lapply(jackTrees, `[[`, 1))
+# Count support if all most parsimonious trees support a split;
+# contradiction if all trees contradict it; don't include replicates where
+# not all trees agree on the resolution of a split.
+labels <- JackLabels(cons, jackTrees)
+
+# How many iterations were decisive for each node?
+attr(labels, "decisive")
+
+# Show as proportion
+JackLabels(cons, jackTrees, showFrac = TRUE)
+
# Restore graphical parameters
par(oPar)
}
From 5ef5e28e6d17b4f1e89367e38863a0c8e8482a87 Mon Sep 17 00:00:00 2001
From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com>
Date: Tue, 5 Aug 2025 10:22:49 +0100
Subject: [PATCH 06/12] v1.6.1.9005
---
DESCRIPTION | 2 +-
NEWS.md | 5 ++++-
2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
index 94b2fcceb..963ce4ee8 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: TreeSearch
Title: Phylogenetic Analysis with Discrete Character Data
-Version: 1.6.1.9004
+Version: 1.6.1.9005
Authors@R: c(
person(
"Martin R.", 'Smith',
diff --git a/NEWS.md b/NEWS.md
index 97ba6899b..94599506e 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,7 @@
-# TreeSearch 1.6.1.9004 (development)
+# TreeSearch 1.6.1.9005 (development)
+
+- `JackLabels()` supports multiple trees per iteration
+ (#197)[https://github.com/ms609/TreeSearch/discussions/197]
- `PresCont()` implements the Group Present / Contradicted measure of
Goloboff et al. (2003).
- Support single-character matrices in `ClusteringConcordance()`
From 1477880ba48c7dbbfe6328bc5622fde435ff4450 Mon Sep 17 00:00:00 2001
From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com>
Date: Tue, 5 Aug 2025 12:24:59 +0100
Subject: [PATCH 07/12] Output formats
---
R/Jackknife.R | 69 ++++++++++++++++++++-------------
tests/testthat/test-Jackknife.R | 20 +++++++---
2 files changed, 56 insertions(+), 33 deletions(-)
diff --git a/R/Jackknife.R b/R/Jackknife.R
index e558e84ed..5ce9e04ea 100644
--- a/R/Jackknife.R
+++ b/R/Jackknife.R
@@ -27,7 +27,7 @@ Jackknife <- function(tree, dataset, resampleFreq = 2 / 3,
CleanUpData = UnloadMorphy,
TreeScorer = MorphyLength,
EdgeSwapper = TBRSwap,
- jackIter = 5000L, searchIter = 4000L, searchHits = 42L,
+ jackIter = 5000L, searchIter = 4000L, searchHits = 42L,
verbosity = 1L, ...) {
# Initialize tree and data
if (dim(tree[["edge"]])[1] != 2 * tree[["Nnode"]]) {
@@ -101,6 +101,10 @@ Jackknife <- function(tree, dataset, resampleFreq = 2 / 3,
#' unique split.
#' @param showFraction Logical specifying whether to also annotate nodes
#' with the fraction of replicates that were decisive for the split.
+#' @param format Character specifying return format.
+#' `"character"` returns a character string suitable to add to the `node.labels`
+#' attribute of a tree;
+#' "numeric" returns numeric values suitable for further analysis.
#'
#' If an element of `jackTrees` contains multiple trees, then the iteration is
#' counted as supporting a split if all trees contain the split, and as
@@ -110,7 +114,7 @@ Jackknife <- function(tree, dataset, resampleFreq = 2 / 3,
#'
#' @return A named vector specifying the proportion of jackknife iterations
#' consistent with each node in `tree`, as plotted.
-#' If `plot = FALSE`, `NA` entries are included corresponding to nodes
+#' If `format = "character"`, blank entries are included corresponding to nodes
#' that do not require labels, such that the return value is in the format
#' required by `phylo$node.label`.
#' If multiple trees are specified per iteration, the return value has an
@@ -146,7 +150,7 @@ JackLabels <- function (tree, jackTrees,
plot = TRUE,
add = FALSE,
adj = 0, col = NULL, frame = "none", pos = 2L,
- showFraction = FALSE,
+ showFraction = FALSE, format = "character",
...) {
nJack <- length(jackTrees)
multi <- vapply(jackTrees, inherits, TRUE, "multiPhylo")
@@ -163,38 +167,47 @@ JackLabels <- function (tree, jackTrees,
jackSupport <- SplitFrequency(tree, jackTrees) / nJack
}
+ fracText <- if(isTRUE(showFraction)) {
+ if (!any(multi)) {
+ numerator <- jackSupport * nJack
+ denominator <- nJack
+ }
+ paste0("{", numerator, " / ", denominator, "}")
+ } else {
+ character(0)
+ }
if (plot) {
if (!add) plot(tree)
if (is.null(col)) {
col <- SupportColour(jackSupport)
}
- fracText <- if(isTRUE(showFraction)) {
- if (!any(multi)) {
- numerator <- jackSupport * nJack
- denominator <- nJack
- }
- paste0("(", numerator, " / ", denominator, ")")
- } else {
- character(0)
- }
- nodelabels(paste("\n\n", signif(jackSupport, 2), fracText),
+ nodelabels(paste("\n\n", signif(jackSupport, 2),
+ gsub("{", "(", fixed = TRUE,
+ gsub("}", ")", fixed = TRUE, fracText))),
node = as.integer(names(jackSupport)),
adj = adj, col = col, pos = pos, frame = frame, ...)
-
- # Return:
- jackSupport
- } else {
- ret <- `length<-`(double(0), tree[["Nnode"]])
- idx <- as.integer(names(jackSupport)) - NTip(tree)
- ret[idx] <- jackSupport
- if (!is.null(attr(jackSupport, "decisive"))) {
- decisive <- `length<-`(integer(0), tree[["Nnode"]])
- decisive[idx] <- attr(jackSupport, "decisive")
- attr(ret, "decisive") <- decisive
- }
-
- # Return:
- ret
}
+
+ numeric <- c("numeric", "number", "double")
+ character <- c("character", "text")
+ returnMode <- c(rep("numeric", length(numeric)),
+ rep("character", length(character)))[
+ pmatch(tolower(format), c(numeric, character), duplicates.ok = TRUE)]
+
+ # Return:
+ switch(
+ returnMode,
+ "character" = {
+ ret <- character(tree[["Nnode"]])
+ idx <- as.integer(names(jackSupport)) - NTip(tree)
+
+ ret[idx] <- if (isTRUE(showFraction)) {
+ paste(jackSupport, fracText)
+ } else {
+ jackSupport
+ }
+ ret
+ }, jackSupport
+ )
}
diff --git a/tests/testthat/test-Jackknife.R b/tests/testthat/test-Jackknife.R
index c3ff76949..c6d9191ba 100644
--- a/tests/testthat/test-Jackknife.R
+++ b/tests/testthat/test-Jackknife.R
@@ -31,12 +31,14 @@ test_that("Jackknife ouputs good for node.labels", {
jackTrees <- as.phylo(1:100, 8)
tree <- as.phylo(0, 8)
- expect_equal(JackLabels(tree, jackTrees, plot = FALSE),
- c(NA_real_, NA_real_, 0.13, 0.08, 0.14, 1, 1))
+ expect_equal(JackLabels(tree, jackTrees, plot = FALSE, format = "char"),
+ c("", "", 0.13, 0.08, 0.14, 1, 1))
tree <- RootTree(as.phylo(0, 8), c("t1", "t4"))
- expect_equal(JackLabels(tree, jackTrees, plot = FALSE),
- c(NA_real_, 0.08, 0.13, NA_real_, 0.14, 1, 1))
+ expect_equal(JackLabels(tree, jackTrees, plot = FALSE, format = "text"),
+ c("", 0.08, 0.13, "", 0.14, 1, 1))
+ expect_equal(JackLabels(tree, jackTrees, plot = FALSE, format = "num"),
+ setNames(c(0.08, 0.13, 0.14, 1, 1), c(10, 11, 13:15)))
skip_if_not_installed("vdiffr")
vdiffr::expect_doppelganger("plot-jackknife", function() {
@@ -59,7 +61,15 @@ test_that("JackLabels() handles multiple trees per iteration", {
BalancedTree(5)
)
expect_equal(
- JackLabels(tree, jackTrees),
+ JackLabels(tree, jackTrees, plot = FALSE),
structure(c("7" = 4 / 5, "8" = 2 / 4), decisive = c("7" = 5, "8" = 4))
)
+
+ lab <- JackLabels(tree, jackTrees, format = "character", showFraction = TRUE,
+ plot = FALSE)
+ tree[["node.label"]] <- lab
+ expect_equal(gsub("_", " ", fixed = TRUE,
+ ape::read.tree(text = ape::write.tree(tree)
+ )[["node.label"]]),
+ lab)
})
From 7d9441c5e1ef12a851fcaac2602dbda97eee77c3 Mon Sep 17 00:00:00 2001
From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com>
Date: Tue, 5 Aug 2025 12:30:21 +0100
Subject: [PATCH 08/12] Update tests
---
R/Jackknife.R | 15 +++++++++------
.../testthat/_snaps/Jackknife/plot-jackknife.svg | 10 +++++-----
tests/testthat/test-Jackknife.R | 6 ++----
3 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/R/Jackknife.R b/R/Jackknife.R
index 5ce9e04ea..b9f08f516 100644
--- a/R/Jackknife.R
+++ b/R/Jackknife.R
@@ -90,6 +90,15 @@ Jackknife <- function(tree, dataset, resampleFreq = 2 / 3,
#' Label nodes with jackknife support values
#'
+#' `JackLabels()` produces a list of node labels denoting split support from
+#' a set of resampled trees, optionally printing them on a tree.
+#'
+#' If an element of `jackTrees` contains multiple trees, then the iteration is
+#' counted as supporting a split if all trees contain the split, and as
+#' contradicting the split if no trees contain it. If a split is only present
+#' in a subset of trees, that iteration is considered not to be decisive, and
+#' is ignored when calculating the support for that split.
+#'
#' @inheritParams TreeTools::Renumber
#' @param jackTrees A list or `multiPhylo` object containing trees generated
#' by [`Resample()`] or [`Jackknife()`].
@@ -106,12 +115,6 @@ Jackknife <- function(tree, dataset, resampleFreq = 2 / 3,
#' attribute of a tree;
#' "numeric" returns numeric values suitable for further analysis.
#'
-#' If an element of `jackTrees` contains multiple trees, then the iteration is
-#' counted as supporting a split if all trees contain the split, and as
-#' contradicting the split if no trees contain it. If a split is only present
-#' in a subset of trees, that iteration is considered not to be decisive, and
-#' is ignored when calculating the support for that split.
-#'
#' @return A named vector specifying the proportion of jackknife iterations
#' consistent with each node in `tree`, as plotted.
#' If `format = "character"`, blank entries are included corresponding to nodes
diff --git a/tests/testthat/_snaps/Jackknife/plot-jackknife.svg b/tests/testthat/_snaps/Jackknife/plot-jackknife.svg
index 381c69744..594a3f43a 100644
--- a/tests/testthat/_snaps/Jackknife/plot-jackknife.svg
+++ b/tests/testthat/_snaps/Jackknife/plot-jackknife.svg
@@ -56,18 +56,18 @@
t8
- 0.08
+ 0.08
- 0.13
+ 0.13
- 0.14
+ 0.14
- 1
+ 1
- 1
+ 1
diff --git a/tests/testthat/test-Jackknife.R b/tests/testthat/test-Jackknife.R
index c6d9191ba..9a2622027 100644
--- a/tests/testthat/test-Jackknife.R
+++ b/tests/testthat/test-Jackknife.R
@@ -42,15 +42,13 @@ test_that("Jackknife ouputs good for node.labels", {
skip_if_not_installed("vdiffr")
vdiffr::expect_doppelganger("plot-jackknife", function() {
- expect_equal(as.double(JackLabels(tree, jackTrees, plot = FALSE)[-c(1, 4)]),
+ expect_equal(JackLabels(tree, jackTrees, plot = FALSE),
unname(JackLabels(tree, jackTrees)))
})
})
test_that("JackLabels() handles multiple trees per iteration", {
tree <- BalancedTree(5)
- plot(tree)
- nodelabels()
dispute8 <- ape::read.tree(text = "(((t1, t3), t2), (t4, t5));")
disagree <- ape::read.tree(text = "(((t5, t2), t3), (t4, t1));")
jackTrees <- list(
@@ -61,7 +59,7 @@ test_that("JackLabels() handles multiple trees per iteration", {
BalancedTree(5)
)
expect_equal(
- JackLabels(tree, jackTrees, plot = FALSE),
+ JackLabels(tree, jackTrees, plot = FALSE, format = "Double"),
structure(c("7" = 4 / 5, "8" = 2 / 4), decisive = c("7" = 5, "8" = 4))
)
From 6fe1f9163b7530060f3dfebc43fb0f075bc1d229 Mon Sep 17 00:00:00 2001
From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com>
Date: Tue, 5 Aug 2025 12:31:48 +0100
Subject: [PATCH 09/12] Update JackLabels.Rd
---
man/JackLabels.Rd | 24 ++++++++++++++++--------
1 file changed, 16 insertions(+), 8 deletions(-)
diff --git a/man/JackLabels.Rd b/man/JackLabels.Rd
index 67f7abac6..25f9236f2 100644
--- a/man/JackLabels.Rd
+++ b/man/JackLabels.Rd
@@ -14,6 +14,7 @@ JackLabels(
frame = "none",
pos = 2L,
showFraction = FALSE,
+ format = "character",
...
)
}
@@ -33,18 +34,17 @@ plot.}
\item{adj, col, frame, pos, \dots}{Parameters to pass to \code{nodelabels()}.}
\item{showFraction}{Logical specifying whether to also annotate nodes
-with the fraction of replicates that were decisive for the split.
+with the fraction of replicates that were decisive for the split.}
-If an element of \code{jackTrees} contains multiple trees, then the iteration is
-counted as supporting a split if all trees contain the split, and as
-contradicting the split if no trees contain it. If a split is only present
-in a subset of trees, that iteration is considered not to be decisive, and
-is ignored when calculating the support for that split.}
+\item{format}{Character specifying return format.
+\code{"character"} returns a character string suitable to add to the \code{node.labels}
+attribute of a tree;
+"numeric" returns numeric values suitable for further analysis.}
}
\value{
A named vector specifying the proportion of jackknife iterations
consistent with each node in \code{tree}, as plotted.
-If \code{plot = FALSE}, \code{NA} entries are included corresponding to nodes
+If \code{format = "character"}, blank entries are included corresponding to nodes
that do not require labels, such that the return value is in the format
required by \code{phylo$node.label}.
If multiple trees are specified per iteration, the return value has an
@@ -52,7 +52,15 @@ attribute \code{decisive} listing, for each entry in the return value, how many
iterations were decisive for that split.
}
\description{
-Label nodes with jackknife support values
+\code{JackLabels()} produces a list of node labels denoting split support from
+a set of resampled trees, optionally printing them on a tree.
+}
+\details{
+If an element of \code{jackTrees} contains multiple trees, then the iteration is
+counted as supporting a split if all trees contain the split, and as
+contradicting the split if no trees contain it. If a split is only present
+in a subset of trees, that iteration is considered not to be decisive, and
+is ignored when calculating the support for that split.
}
\examples{
library("TreeTools", quietly = TRUE) # for as.phylo
From d61216792022abb940c15a48de75c7bf6140b512 Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Tue, 5 Aug 2025 14:45:12 +0100
Subject: [PATCH 10/12] github.com
NEWS url
---
inst/WORDLIST | 2 ++
1 file changed, 2 insertions(+)
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 0bd6cd091..6ad16dccc 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -185,6 +185,7 @@ cf
cla
codecov
colourblind
+com
dataset's
dd
doi
@@ -195,6 +196,7 @@ entelegyne
equiprobable
ffmpeg
frac
+github
gnathostome
homoplasies
homoplasious
From eebe76ff57e458f8446bf1db4e29faf83bfc179b Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Tue, 5 Aug 2025 14:46:30 +0100
Subject: [PATCH 11/12] Import NSplits
---
NAMESPACE | 1 +
R/Jackknife.R | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/NAMESPACE b/NAMESPACE
index a8f2ad088..79e40d690 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -161,6 +161,7 @@ importFrom(TreeTools,LogDoubleFactorial)
importFrom(TreeTools,MakeTreeBinary)
importFrom(TreeTools,MatrixToPhyDat)
importFrom(TreeTools,NRooted)
+importFrom(TreeTools,NSplits)
importFrom(TreeTools,NTip)
importFrom(TreeTools,NUnrooted)
importFrom(TreeTools,NUnrootedMult)
diff --git a/R/Jackknife.R b/R/Jackknife.R
index b9f08f516..2ad1a8ee3 100644
--- a/R/Jackknife.R
+++ b/R/Jackknife.R
@@ -143,7 +143,7 @@ Jackknife <- function(tree, dataset, resampleFreq = 2 / 3,
#' # write.nexus(tree, file = filename)
#' @template MRS
#' @importFrom ape nodelabels
-#' @importFrom TreeTools SplitFrequency SupportColour
+#' @importFrom TreeTools NSplits SplitFrequency SupportColour
#' @seealso
#' Generate trees by jackknife resampling using [`Resample()`] for standard
#' parsimony searches, or [`Jackknife()`] for custom search criteria.
From d03eb7d20eeabdfebb31b074c5fbc2278caa55cb Mon Sep 17 00:00:00 2001
From: RevBayes analysis <1695515+ms609@users.noreply.github.com>
Date: Tue, 5 Aug 2025 14:47:15 +0100
Subject: [PATCH 12/12] Link SplitFrequency
---
R/PresentContra.R | 6 +++---
man/PresCont.Rd | 6 +++---
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/R/PresentContra.R b/R/PresentContra.R
index 71d6e9866..56a5c98ab 100644
--- a/R/PresentContra.R
+++ b/R/PresentContra.R
@@ -41,9 +41,9 @@
#' @param adj,col,frame,pos,\dots Parameters to pass to `nodelabels()`.
#'
#' @seealso
-#' [`SplitFrequency()`] and [`MostContradictedFreq()`] will compute the number
-#' of trees that contain the split, and the frequency of the most contradicted
-#' split, respectively.
+#' \code{\link[TreeTools]{SplitFrequency}()} and [`MostContradictedFreq()`] will
+#' compute the number of trees that contain the split, and the frequency of the
+#' most contradicted split, respectively.
#' @references \insertAllCited{}
#' @examples
#' library("TreeTools", quietly = TRUE) # for as.phylo
diff --git a/man/PresCont.Rd b/man/PresCont.Rd
index a7481ef19..6a2befbb7 100644
--- a/man/PresCont.Rd
+++ b/man/PresCont.Rd
@@ -98,9 +98,9 @@ gpc
\insertAllCited{}
}
\seealso{
-\code{\link[=SplitFrequency]{SplitFrequency()}} and \code{\link[=MostContradictedFreq]{MostContradictedFreq()}} will compute the number
-of trees that contain the split, and the frequency of the most contradicted
-split, respectively.
+\code{\link[TreeTools]{SplitFrequency}()} and \code{\link[=MostContradictedFreq]{MostContradictedFreq()}} will
+compute the number of trees that contain the split, and the frequency of the
+most contradicted split, respectively.
Other split support functions:
\code{\link{JackLabels}()},