diff --git a/DESCRIPTION b/DESCRIPTION
index 94b2fcceb..963ce4ee8 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: TreeSearch
Title: Phylogenetic Analysis with Discrete Character Data
-Version: 1.6.1.9004
+Version: 1.6.1.9005
Authors@R: c(
person(
"Martin R.", 'Smith',
diff --git a/NAMESPACE b/NAMESPACE
index a8f2ad088..79e40d690 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -161,6 +161,7 @@ importFrom(TreeTools,LogDoubleFactorial)
importFrom(TreeTools,MakeTreeBinary)
importFrom(TreeTools,MatrixToPhyDat)
importFrom(TreeTools,NRooted)
+importFrom(TreeTools,NSplits)
importFrom(TreeTools,NTip)
importFrom(TreeTools,NUnrooted)
importFrom(TreeTools,NUnrootedMult)
diff --git a/NEWS.md b/NEWS.md
index 97ba6899b..94599506e 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,7 @@
-# TreeSearch 1.6.1.9004 (development)
+# TreeSearch 1.6.1.9005 (development)
+
+- `JackLabels()` supports multiple trees per iteration
+ (#197)[https://github.com/ms609/TreeSearch/discussions/197]
- `PresCont()` implements the Group Present / Contradicted measure of
Goloboff et al. (2003).
- Support single-character matrices in `ClusteringConcordance()`
diff --git a/R/Jackknife.R b/R/Jackknife.R
index 59ebf19d2..2ad1a8ee3 100644
--- a/R/Jackknife.R
+++ b/R/Jackknife.R
@@ -3,10 +3,9 @@
#' Resample trees using Jackknife resampling, i.e. removing a subset of
#' characters.
#'
-#' The function assumes
-#' that `InitializeData()` will return a morphy object; if this doesn't hold
-#' for you, post a [GitHub issue](https://github.com/ms609/TreeSearch/issues/new/)
-#' or e-mail the maintainer.
+#' The function assumes that `InitializeData()` will return a morphy object;
+#' if this doesn't hold for you, post a [GitHub issue](
+#' https://github.com/ms609/TreeSearch/issues/new/) or e-mail the maintainer.
#'
#' @inheritParams Ratchet
#' @param resampleFreq Double between 0 and 1 stating proportion of characters
@@ -23,18 +22,18 @@
#' @family split support functions
#' @family custom search functions
#' @export
-Jackknife <- function (tree, dataset, resampleFreq = 2/3,
- InitializeData = PhyDat2Morphy,
- CleanUpData = UnloadMorphy,
- TreeScorer = MorphyLength,
- EdgeSwapper = TBRSwap,
- jackIter = 5000L,
- searchIter = 4000L, searchHits = 42L,
- verbosity = 1L, ...) {
- # initialize tree and data
+Jackknife <- function(tree, dataset, resampleFreq = 2 / 3,
+ InitializeData = PhyDat2Morphy,
+ CleanUpData = UnloadMorphy,
+ TreeScorer = MorphyLength,
+ EdgeSwapper = TBRSwap,
+ jackIter = 5000L, searchIter = 4000L, searchHits = 42L,
+ verbosity = 1L, ...) {
+ # Initialize tree and data
if (dim(tree[["edge"]])[1] != 2 * tree[["Nnode"]]) {
stop("tree must be bifurcating; try rooting with ape::root")
}
+
tree <- RenumberTips(tree, names(dataset))
edgeList <- tree[["edge"]]
edgeList <- RenumberEdges(edgeList[, 1], edgeList[, 2])
@@ -46,6 +45,7 @@ Jackknife <- function (tree, dataset, resampleFreq = 2/3,
eachChar <- seq_along(startWeights)
deindexedChars <- rep.int(eachChar, startWeights)
charsToKeep <- ceiling(resampleFreq * length(deindexedChars))
+
if (charsToKeep < 1L) {
stop("resampleFreq of ", resampleFreq, " is too low; can't keep 0 of ",
length(deindexedChars), " characters.")
@@ -53,6 +53,7 @@ Jackknife <- function (tree, dataset, resampleFreq = 2/3,
stop("resampleFreq of ", resampleFreq, " is too high; can't keep all ",
length(deindexedChars), " characters.")
}
+
if (verbosity > 10L) { #nocov start
message(" * Beginning search:")
} #nocov end
@@ -89,6 +90,15 @@ Jackknife <- function (tree, dataset, resampleFreq = 2/3,
#' Label nodes with jackknife support values
#'
+#' `JackLabels()` produces a list of node labels denoting split support from
+#' a set of resampled trees, optionally printing them on a tree.
+#'
+#' If an element of `jackTrees` contains multiple trees, then the iteration is
+#' counted as supporting a split if all trees contain the split, and as
+#' contradicting the split if no trees contain it. If a split is only present
+#' in a subset of trees, that iteration is considered not to be decisive, and
+#' is ignored when calculating the support for that split.
+#'
#' @inheritParams TreeTools::Renumber
#' @param jackTrees A list or `multiPhylo` object containing trees generated
#' by [`Resample()`] or [`Jackknife()`].
@@ -98,12 +108,21 @@ Jackknife <- function (tree, dataset, resampleFreq = 2/3,
#' @param plot Logical specifying whether to plot results; if `FALSE`,
#' returns blank labels for nodes near the root that do not correspond to a
#' unique split.
+#' @param showFraction Logical specifying whether to also annotate nodes
+#' with the fraction of replicates that were decisive for the split.
+#' @param format Character specifying return format.
+#' `"character"` returns a character string suitable to add to the `node.labels`
+#' attribute of a tree;
+#' "numeric" returns numeric values suitable for further analysis.
#'
-#' @return A named vector specifying the proportion of jackknife trees
+#' @return A named vector specifying the proportion of jackknife iterations
#' consistent with each node in `tree`, as plotted.
-#' If `plot = FALSE`, blank entries are included corresponding to nodes
-#' that do not require labelling; the return value is in the value required
-#' by `phylo$node.label`.
+#' If `format = "character"`, blank entries are included corresponding to nodes
+#' that do not require labels, such that the return value is in the format
+#' required by `phylo$node.label`.
+#' If multiple trees are specified per iteration, the return value has an
+#' attribute `decisive` listing, for each entry in the return value, how many
+#' iterations were decisive for that split.
#'
#' @examples
#' library("TreeTools", quietly = TRUE) # for as.phylo
@@ -124,7 +143,7 @@ Jackknife <- function (tree, dataset, resampleFreq = 2/3,
#' # write.nexus(tree, file = filename)
#' @template MRS
#' @importFrom ape nodelabels
-#' @importFrom TreeTools SplitFrequency SupportColour
+#' @importFrom TreeTools NSplits SplitFrequency SupportColour
#' @seealso
#' Generate trees by jackknife resampling using [`Resample()`] for standard
#' parsimony searches, or [`Jackknife()`] for custom search criteria.
@@ -134,25 +153,64 @@ JackLabels <- function (tree, jackTrees,
plot = TRUE,
add = FALSE,
adj = 0, col = NULL, frame = "none", pos = 2L,
+ showFraction = FALSE, format = "character",
...) {
- jackSupport <- SplitFrequency(tree, jackTrees) / length(jackTrees)
+ nJack <- length(jackTrees)
+ multi <- vapply(jackTrees, inherits, TRUE, "multiPhylo")
+ if (any(multi)) {
+ jackTrees[!multi] <- lapply(jackTrees[!multi], c)
+ supports <- vapply(jackTrees, function(trees) {
+ freq <- SplitFrequency(tree, trees)
+ ifelse(freq == 0, FALSE, ifelse(freq == length(trees), TRUE, NA))
+ }, logical(NSplits(tree)))
+ numerator <- rowSums(supports, na.rm = TRUE)
+ denominator <- rowSums(!is.na(supports))
+ jackSupport <- structure(numerator / denominator, decisive = denominator)
+ } else {
+ jackSupport <- SplitFrequency(tree, jackTrees) / nJack
+ }
+
+ fracText <- if(isTRUE(showFraction)) {
+ if (!any(multi)) {
+ numerator <- jackSupport * nJack
+ denominator <- nJack
+ }
+ paste0("{", numerator, " / ", denominator, "}")
+ } else {
+ character(0)
+ }
if (plot) {
if (!add) plot(tree)
if (is.null(col)) {
col <- SupportColour(jackSupport)
}
- nodelabels(paste("\n\n", signif(jackSupport, 2)),
+ nodelabels(paste("\n\n", signif(jackSupport, 2),
+ gsub("{", "(", fixed = TRUE,
+ gsub("}", ")", fixed = TRUE, fracText))),
node = as.integer(names(jackSupport)),
adj = adj, col = col, pos = pos, frame = frame, ...)
-
- # Return:
- jackSupport
- } else {
- ret <- character(tree[["Nnode"]])
- ret[as.integer(names(jackSupport)) - NTip(tree)] <- jackSupport
-
- # Return:
- ret
}
+
+ numeric <- c("numeric", "number", "double")
+ character <- c("character", "text")
+ returnMode <- c(rep("numeric", length(numeric)),
+ rep("character", length(character)))[
+ pmatch(tolower(format), c(numeric, character), duplicates.ok = TRUE)]
+
+ # Return:
+ switch(
+ returnMode,
+ "character" = {
+ ret <- character(tree[["Nnode"]])
+ idx <- as.integer(names(jackSupport)) - NTip(tree)
+
+ ret[idx] <- if (isTRUE(showFraction)) {
+ paste(jackSupport, fracText)
+ } else {
+ jackSupport
+ }
+ ret
+ }, jackSupport
+ )
}
diff --git a/R/MaximizeParsimony.R b/R/MaximizeParsimony.R
index 582b52ded..4788f5a3f 100644
--- a/R/MaximizeParsimony.R
+++ b/R/MaximizeParsimony.R
@@ -188,14 +188,15 @@
#'
#' # Load data for analysis in R
#' library("TreeTools")
-#' data("congreveLamsdellMatrices", package = "TreeSearch")
-#' dataset <- congreveLamsdellMatrices[[42]]
+#' data("inapplicable.phyData", package = "TreeSearch")
+#' dataset <- inapplicable.phyData[["Asher2005"]]
#'
#' # A very quick run for demonstration purposes
#' trees <- MaximizeParsimony(dataset, ratchIter = 0, startIter = 0,
#' tbrIter = 1, maxHits = 4, maxTime = 1/100,
#' concavity = 10, verbosity = 4)
#' names(trees)
+#' cons <- Consensus(trees)
#'
#' # In actual use, be sure to check that the score has converged on a global
#' # optimum, conducting additional iterations and runs as necessary.
@@ -216,14 +217,31 @@
#' # Now we must decide what to do with the multiple optimal trees from
#' # each replicate.
#'
-#' # Treat each tree equally
-#' JackLabels(ape::consensus(trees), unlist(jackTrees, recursive = FALSE))
+#' # Set graphical parameters for plotting
+#' oPar <- par(mar = rep(0, 4), cex = 0.9)
+#'
+#' # Treat each tree as a separate replicate (problematic)
+#' JackLabels(cons, unlist(jackTrees, recursive = FALSE))
#'
#' # Take the strict consensus of all trees for each replicate
-#' JackLabels(ape::consensus(trees), lapply(jackTrees, ape::consensus))
+#' JackLabels(cons, lapply(jackTrees, ape::consensus))
#'
#' # Take a single tree from each replicate (the first; order's irrelevant)
-#' JackLabels(ape::consensus(trees), lapply(jackTrees, `[[`, 1))
+#' JackLabels(cons, lapply(jackTrees, `[[`, 1))
+#'
+#' # Count support if all most parsimonious trees support a split;
+#' # contradiction if all trees contradict it; don't include replicates where
+#' # not all trees agree on the resolution of a split.
+#' labels <- JackLabels(cons, jackTrees)
+#'
+#' # How many iterations were decisive for each node?
+#' attr(labels, "decisive")
+#'
+#' # Show as proportion
+#' JackLabels(cons, jackTrees, showFrac = TRUE)
+#'
+#' # Restore graphical parameters
+#' par(oPar)
#' }
#'
#' # Tree search with a constraint
@@ -935,17 +953,17 @@ MaximizeParsimony <- function (dataset, tree,
#' @family split support functions
#' @encoding UTF-8
#' @export
-Resample <- function (dataset, tree, method = "jack",
- proportion = 2/3,
- ratchIter = 1L, tbrIter = 8L, finalIter = 3L,
- maxHits = 12L, concavity = Inf,
- tolerance = sqrt(.Machine[["double.eps"]]),
- constraint,
- verbosity = 2L,
- ...) {
+Resample <- function(dataset, tree, method = "jack", proportion = 2 / 3,
+ ratchIter = 1L, tbrIter = 8L, finalIter = 3L,
+ maxHits = 12L, concavity = Inf,
+ tolerance = sqrt(.Machine[["double.eps"]]),
+ constraint, verbosity = 2L,
+ ...) {
+
if (!inherits(dataset, "phyDat")) {
stop("`dataset` must be of class `phyDat`.")
}
+
index <- attr(dataset, "index")
kept <- switch(pmatch(tolower(method), c("jackknife", "bootstrap")),
{
@@ -960,6 +978,7 @@ Resample <- function (dataset, tree, method = "jack",
}, {
sample(index, length(index), replace = TRUE)
})
+
if (is.null(kept)) {
stop("`method` must be either \"jackknife\" or \"bootstrap\".")
}
diff --git a/R/PresentContra.R b/R/PresentContra.R
index 71d6e9866..56a5c98ab 100644
--- a/R/PresentContra.R
+++ b/R/PresentContra.R
@@ -41,9 +41,9 @@
#' @param adj,col,frame,pos,\dots Parameters to pass to `nodelabels()`.
#'
#' @seealso
-#' [`SplitFrequency()`] and [`MostContradictedFreq()`] will compute the number
-#' of trees that contain the split, and the frequency of the most contradicted
-#' split, respectively.
+#' \code{\link[TreeTools]{SplitFrequency}()} and [`MostContradictedFreq()`] will
+#' compute the number of trees that contain the split, and the frequency of the
+#' most contradicted split, respectively.
#' @references \insertAllCited{}
#' @examples
#' library("TreeTools", quietly = TRUE) # for as.phylo
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 0bd6cd091..6ad16dccc 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -185,6 +185,7 @@ cf
cla
codecov
colourblind
+com
dataset's
dd
doi
@@ -195,6 +196,7 @@ entelegyne
equiprobable
ffmpeg
frac
+github
gnathostome
homoplasies
homoplasious
diff --git a/man/JackLabels.Rd b/man/JackLabels.Rd
index 73a75cf38..25f9236f2 100644
--- a/man/JackLabels.Rd
+++ b/man/JackLabels.Rd
@@ -13,6 +13,8 @@ JackLabels(
col = NULL,
frame = "none",
pos = 2L,
+ showFraction = FALSE,
+ format = "character",
...
)
}
@@ -30,16 +32,35 @@ unique split.}
plot.}
\item{adj, col, frame, pos, \dots}{Parameters to pass to \code{nodelabels()}.}
+
+\item{showFraction}{Logical specifying whether to also annotate nodes
+with the fraction of replicates that were decisive for the split.}
+
+\item{format}{Character specifying return format.
+\code{"character"} returns a character string suitable to add to the \code{node.labels}
+attribute of a tree;
+"numeric" returns numeric values suitable for further analysis.}
}
\value{
-A named vector specifying the proportion of jackknife trees
+A named vector specifying the proportion of jackknife iterations
consistent with each node in \code{tree}, as plotted.
-If \code{plot = FALSE}, blank entries are included corresponding to nodes
-that do not require labelling; the return value is in the value required
-by \code{phylo$node.label}.
+If \code{format = "character"}, blank entries are included corresponding to nodes
+that do not require labels, such that the return value is in the format
+required by \code{phylo$node.label}.
+If multiple trees are specified per iteration, the return value has an
+attribute \code{decisive} listing, for each entry in the return value, how many
+iterations were decisive for that split.
}
\description{
-Label nodes with jackknife support values
+\code{JackLabels()} produces a list of node labels denoting split support from
+a set of resampled trees, optionally printing them on a tree.
+}
+\details{
+If an element of \code{jackTrees} contains multiple trees, then the iteration is
+counted as supporting a split if all trees contain the split, and as
+contradicting the split if no trees contain it. If a split is only present
+in a subset of trees, that iteration is considered not to be decisive, and
+is ignored when calculating the support for that split.
}
\examples{
library("TreeTools", quietly = TRUE) # for as.phylo
diff --git a/man/Jackknife.Rd b/man/Jackknife.Rd
index 69388c7df..48b7f0237 100644
--- a/man/Jackknife.Rd
+++ b/man/Jackknife.Rd
@@ -69,10 +69,8 @@ Resample trees using Jackknife resampling, i.e. removing a subset of
characters.
}
\details{
-The function assumes
-that \code{InitializeData()} will return a morphy object; if this doesn't hold
-for you, post a \href{https://github.com/ms609/TreeSearch/issues/new/}{GitHub issue}
-or e-mail the maintainer.
+The function assumes that \code{InitializeData()} will return a morphy object;
+if this doesn't hold for you, post a \href{https://github.com/ms609/TreeSearch/issues/new/}{GitHub issue} or e-mail the maintainer.
}
\seealso{
\itemize{
diff --git a/man/MaximizeParsimony.Rd b/man/MaximizeParsimony.Rd
index d1d9290d8..32ceb4831 100644
--- a/man/MaximizeParsimony.Rd
+++ b/man/MaximizeParsimony.Rd
@@ -281,14 +281,15 @@ if (interactive()) {
# Load data for analysis in R
library("TreeTools")
-data("congreveLamsdellMatrices", package = "TreeSearch")
-dataset <- congreveLamsdellMatrices[[42]]
+data("inapplicable.phyData", package = "TreeSearch")
+dataset <- inapplicable.phyData[["Asher2005"]]
# A very quick run for demonstration purposes
trees <- MaximizeParsimony(dataset, ratchIter = 0, startIter = 0,
tbrIter = 1, maxHits = 4, maxTime = 1/100,
concavity = 10, verbosity = 4)
names(trees)
+cons <- Consensus(trees)
# In actual use, be sure to check that the score has converged on a global
# optimum, conducting additional iterations and runs as necessary.
@@ -309,14 +310,31 @@ jackTrees <- replicate(nReplicates,
# Now we must decide what to do with the multiple optimal trees from
# each replicate.
-# Treat each tree equally
-JackLabels(ape::consensus(trees), unlist(jackTrees, recursive = FALSE))
+# Set graphical parameters for plotting
+oPar <- par(mar = rep(0, 4), cex = 0.9)
+
+# Treat each tree as a separate replicate (problematic)
+JackLabels(cons, unlist(jackTrees, recursive = FALSE))
# Take the strict consensus of all trees for each replicate
-JackLabels(ape::consensus(trees), lapply(jackTrees, ape::consensus))
+JackLabels(cons, lapply(jackTrees, ape::consensus))
# Take a single tree from each replicate (the first; order's irrelevant)
-JackLabels(ape::consensus(trees), lapply(jackTrees, `[[`, 1))
+JackLabels(cons, lapply(jackTrees, `[[`, 1))
+
+# Count support if all most parsimonious trees support a split;
+# contradiction if all trees contradict it; don't include replicates where
+# not all trees agree on the resolution of a split.
+labels <- JackLabels(cons, jackTrees)
+
+# How many iterations were decisive for each node?
+attr(labels, "decisive")
+
+# Show as proportion
+JackLabels(cons, jackTrees, showFrac = TRUE)
+
+# Restore graphical parameters
+par(oPar)
}
# Tree search with a constraint
diff --git a/man/PresCont.Rd b/man/PresCont.Rd
index a7481ef19..6a2befbb7 100644
--- a/man/PresCont.Rd
+++ b/man/PresCont.Rd
@@ -98,9 +98,9 @@ gpc
\insertAllCited{}
}
\seealso{
-\code{\link[=SplitFrequency]{SplitFrequency()}} and \code{\link[=MostContradictedFreq]{MostContradictedFreq()}} will compute the number
-of trees that contain the split, and the frequency of the most contradicted
-split, respectively.
+\code{\link[TreeTools]{SplitFrequency}()} and \code{\link[=MostContradictedFreq]{MostContradictedFreq()}} will
+compute the number of trees that contain the split, and the frequency of the
+most contradicted split, respectively.
Other split support functions:
\code{\link{JackLabels}()},
diff --git a/tests/testthat/_snaps/Jackknife/plot-jackknife.svg b/tests/testthat/_snaps/Jackknife/plot-jackknife.svg
index 381c69744..594a3f43a 100644
--- a/tests/testthat/_snaps/Jackknife/plot-jackknife.svg
+++ b/tests/testthat/_snaps/Jackknife/plot-jackknife.svg
@@ -56,18 +56,18 @@
t8
- 0.08
+ 0.08
- 0.13
+ 0.13
- 0.14
+ 0.14
- 1
+ 1
- 1
+ 1
diff --git a/tests/testthat/test-Jackknife.R b/tests/testthat/test-Jackknife.R
index a93d50597..9a2622027 100644
--- a/tests/testthat/test-Jackknife.R
+++ b/tests/testthat/test-Jackknife.R
@@ -31,16 +31,43 @@ test_that("Jackknife ouputs good for node.labels", {
jackTrees <- as.phylo(1:100, 8)
tree <- as.phylo(0, 8)
- expect_equal(c("", "", "0.13", "0.08", "0.14", "1", "1"),
- JackLabels(tree, jackTrees, plot = FALSE))
+ expect_equal(JackLabels(tree, jackTrees, plot = FALSE, format = "char"),
+ c("", "", 0.13, 0.08, 0.14, 1, 1))
tree <- RootTree(as.phylo(0, 8), c("t1", "t4"))
- expect_equal(c("", "0.08", "0.13", "", "0.14", "1", "1"),
- JackLabels(tree, jackTrees, plot = FALSE))
+ expect_equal(JackLabels(tree, jackTrees, plot = FALSE, format = "text"),
+ c("", 0.08, 0.13, "", 0.14, 1, 1))
+ expect_equal(JackLabels(tree, jackTrees, plot = FALSE, format = "num"),
+ setNames(c(0.08, 0.13, 0.14, 1, 1), c(10, 11, 13:15)))
skip_if_not_installed("vdiffr")
vdiffr::expect_doppelganger("plot-jackknife", function() {
- expect_equal(as.double(JackLabels(tree, jackTrees, plot = FALSE)[-c(1, 4)]),
+ expect_equal(JackLabels(tree, jackTrees, plot = FALSE),
unname(JackLabels(tree, jackTrees)))
})
})
+
+test_that("JackLabels() handles multiple trees per iteration", {
+ tree <- BalancedTree(5)
+ dispute8 <- ape::read.tree(text = "(((t1, t3), t2), (t4, t5));")
+ disagree <- ape::read.tree(text = "(((t5, t2), t3), (t4, t1));")
+ jackTrees <- list(
+ c(dispute8, dispute8),
+ c(tree, tree),
+ c(dispute8, tree),
+ c(disagree, disagree, disagree),
+ BalancedTree(5)
+ )
+ expect_equal(
+ JackLabels(tree, jackTrees, plot = FALSE, format = "Double"),
+ structure(c("7" = 4 / 5, "8" = 2 / 4), decisive = c("7" = 5, "8" = 4))
+ )
+
+ lab <- JackLabels(tree, jackTrees, format = "character", showFraction = TRUE,
+ plot = FALSE)
+ tree[["node.label"]] <- lab
+ expect_equal(gsub("_", " ", fixed = TRUE,
+ ape::read.tree(text = ape::write.tree(tree)
+ )[["node.label"]]),
+ lab)
+})