diff --git a/DESCRIPTION b/DESCRIPTION index 94b2fcceb..963ce4ee8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: TreeSearch Title: Phylogenetic Analysis with Discrete Character Data -Version: 1.6.1.9004 +Version: 1.6.1.9005 Authors@R: c( person( "Martin R.", 'Smith', diff --git a/NAMESPACE b/NAMESPACE index a8f2ad088..79e40d690 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -161,6 +161,7 @@ importFrom(TreeTools,LogDoubleFactorial) importFrom(TreeTools,MakeTreeBinary) importFrom(TreeTools,MatrixToPhyDat) importFrom(TreeTools,NRooted) +importFrom(TreeTools,NSplits) importFrom(TreeTools,NTip) importFrom(TreeTools,NUnrooted) importFrom(TreeTools,NUnrootedMult) diff --git a/NEWS.md b/NEWS.md index 97ba6899b..94599506e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,7 @@ -# TreeSearch 1.6.1.9004 (development) +# TreeSearch 1.6.1.9005 (development) + +- `JackLabels()` supports multiple trees per iteration + (#197)[https://github.com/ms609/TreeSearch/discussions/197] - `PresCont()` implements the Group Present / Contradicted measure of Goloboff et al. (2003). - Support single-character matrices in `ClusteringConcordance()` diff --git a/R/Jackknife.R b/R/Jackknife.R index 59ebf19d2..2ad1a8ee3 100644 --- a/R/Jackknife.R +++ b/R/Jackknife.R @@ -3,10 +3,9 @@ #' Resample trees using Jackknife resampling, i.e. removing a subset of #' characters. #' -#' The function assumes -#' that `InitializeData()` will return a morphy object; if this doesn't hold -#' for you, post a [GitHub issue](https://github.com/ms609/TreeSearch/issues/new/) -#' or e-mail the maintainer. +#' The function assumes that `InitializeData()` will return a morphy object; +#' if this doesn't hold for you, post a [GitHub issue]( +#' https://github.com/ms609/TreeSearch/issues/new/) or e-mail the maintainer. #' #' @inheritParams Ratchet #' @param resampleFreq Double between 0 and 1 stating proportion of characters @@ -23,18 +22,18 @@ #' @family split support functions #' @family custom search functions #' @export -Jackknife <- function (tree, dataset, resampleFreq = 2/3, - InitializeData = PhyDat2Morphy, - CleanUpData = UnloadMorphy, - TreeScorer = MorphyLength, - EdgeSwapper = TBRSwap, - jackIter = 5000L, - searchIter = 4000L, searchHits = 42L, - verbosity = 1L, ...) { - # initialize tree and data +Jackknife <- function(tree, dataset, resampleFreq = 2 / 3, + InitializeData = PhyDat2Morphy, + CleanUpData = UnloadMorphy, + TreeScorer = MorphyLength, + EdgeSwapper = TBRSwap, + jackIter = 5000L, searchIter = 4000L, searchHits = 42L, + verbosity = 1L, ...) { + # Initialize tree and data if (dim(tree[["edge"]])[1] != 2 * tree[["Nnode"]]) { stop("tree must be bifurcating; try rooting with ape::root") } + tree <- RenumberTips(tree, names(dataset)) edgeList <- tree[["edge"]] edgeList <- RenumberEdges(edgeList[, 1], edgeList[, 2]) @@ -46,6 +45,7 @@ Jackknife <- function (tree, dataset, resampleFreq = 2/3, eachChar <- seq_along(startWeights) deindexedChars <- rep.int(eachChar, startWeights) charsToKeep <- ceiling(resampleFreq * length(deindexedChars)) + if (charsToKeep < 1L) { stop("resampleFreq of ", resampleFreq, " is too low; can't keep 0 of ", length(deindexedChars), " characters.") @@ -53,6 +53,7 @@ Jackknife <- function (tree, dataset, resampleFreq = 2/3, stop("resampleFreq of ", resampleFreq, " is too high; can't keep all ", length(deindexedChars), " characters.") } + if (verbosity > 10L) { #nocov start message(" * Beginning search:") } #nocov end @@ -89,6 +90,15 @@ Jackknife <- function (tree, dataset, resampleFreq = 2/3, #' Label nodes with jackknife support values #' +#' `JackLabels()` produces a list of node labels denoting split support from +#' a set of resampled trees, optionally printing them on a tree. +#' +#' If an element of `jackTrees` contains multiple trees, then the iteration is +#' counted as supporting a split if all trees contain the split, and as +#' contradicting the split if no trees contain it. If a split is only present +#' in a subset of trees, that iteration is considered not to be decisive, and +#' is ignored when calculating the support for that split. +#' #' @inheritParams TreeTools::Renumber #' @param jackTrees A list or `multiPhylo` object containing trees generated #' by [`Resample()`] or [`Jackknife()`]. @@ -98,12 +108,21 @@ Jackknife <- function (tree, dataset, resampleFreq = 2/3, #' @param plot Logical specifying whether to plot results; if `FALSE`, #' returns blank labels for nodes near the root that do not correspond to a #' unique split. +#' @param showFraction Logical specifying whether to also annotate nodes +#' with the fraction of replicates that were decisive for the split. +#' @param format Character specifying return format. +#' `"character"` returns a character string suitable to add to the `node.labels` +#' attribute of a tree; +#' "numeric" returns numeric values suitable for further analysis. #' -#' @return A named vector specifying the proportion of jackknife trees +#' @return A named vector specifying the proportion of jackknife iterations #' consistent with each node in `tree`, as plotted. -#' If `plot = FALSE`, blank entries are included corresponding to nodes -#' that do not require labelling; the return value is in the value required -#' by `phylo$node.label`. +#' If `format = "character"`, blank entries are included corresponding to nodes +#' that do not require labels, such that the return value is in the format +#' required by `phylo$node.label`. +#' If multiple trees are specified per iteration, the return value has an +#' attribute `decisive` listing, for each entry in the return value, how many +#' iterations were decisive for that split. #' #' @examples #' library("TreeTools", quietly = TRUE) # for as.phylo @@ -124,7 +143,7 @@ Jackknife <- function (tree, dataset, resampleFreq = 2/3, #' # write.nexus(tree, file = filename) #' @template MRS #' @importFrom ape nodelabels -#' @importFrom TreeTools SplitFrequency SupportColour +#' @importFrom TreeTools NSplits SplitFrequency SupportColour #' @seealso #' Generate trees by jackknife resampling using [`Resample()`] for standard #' parsimony searches, or [`Jackknife()`] for custom search criteria. @@ -134,25 +153,64 @@ JackLabels <- function (tree, jackTrees, plot = TRUE, add = FALSE, adj = 0, col = NULL, frame = "none", pos = 2L, + showFraction = FALSE, format = "character", ...) { - jackSupport <- SplitFrequency(tree, jackTrees) / length(jackTrees) + nJack <- length(jackTrees) + multi <- vapply(jackTrees, inherits, TRUE, "multiPhylo") + if (any(multi)) { + jackTrees[!multi] <- lapply(jackTrees[!multi], c) + supports <- vapply(jackTrees, function(trees) { + freq <- SplitFrequency(tree, trees) + ifelse(freq == 0, FALSE, ifelse(freq == length(trees), TRUE, NA)) + }, logical(NSplits(tree))) + numerator <- rowSums(supports, na.rm = TRUE) + denominator <- rowSums(!is.na(supports)) + jackSupport <- structure(numerator / denominator, decisive = denominator) + } else { + jackSupport <- SplitFrequency(tree, jackTrees) / nJack + } + + fracText <- if(isTRUE(showFraction)) { + if (!any(multi)) { + numerator <- jackSupport * nJack + denominator <- nJack + } + paste0("{", numerator, " / ", denominator, "}") + } else { + character(0) + } if (plot) { if (!add) plot(tree) if (is.null(col)) { col <- SupportColour(jackSupport) } - nodelabels(paste("\n\n", signif(jackSupport, 2)), + nodelabels(paste("\n\n", signif(jackSupport, 2), + gsub("{", "(", fixed = TRUE, + gsub("}", ")", fixed = TRUE, fracText))), node = as.integer(names(jackSupport)), adj = adj, col = col, pos = pos, frame = frame, ...) - - # Return: - jackSupport - } else { - ret <- character(tree[["Nnode"]]) - ret[as.integer(names(jackSupport)) - NTip(tree)] <- jackSupport - - # Return: - ret } + + numeric <- c("numeric", "number", "double") + character <- c("character", "text") + returnMode <- c(rep("numeric", length(numeric)), + rep("character", length(character)))[ + pmatch(tolower(format), c(numeric, character), duplicates.ok = TRUE)] + + # Return: + switch( + returnMode, + "character" = { + ret <- character(tree[["Nnode"]]) + idx <- as.integer(names(jackSupport)) - NTip(tree) + + ret[idx] <- if (isTRUE(showFraction)) { + paste(jackSupport, fracText) + } else { + jackSupport + } + ret + }, jackSupport + ) } diff --git a/R/MaximizeParsimony.R b/R/MaximizeParsimony.R index 582b52ded..4788f5a3f 100644 --- a/R/MaximizeParsimony.R +++ b/R/MaximizeParsimony.R @@ -188,14 +188,15 @@ #' #' # Load data for analysis in R #' library("TreeTools") -#' data("congreveLamsdellMatrices", package = "TreeSearch") -#' dataset <- congreveLamsdellMatrices[[42]] +#' data("inapplicable.phyData", package = "TreeSearch") +#' dataset <- inapplicable.phyData[["Asher2005"]] #' #' # A very quick run for demonstration purposes #' trees <- MaximizeParsimony(dataset, ratchIter = 0, startIter = 0, #' tbrIter = 1, maxHits = 4, maxTime = 1/100, #' concavity = 10, verbosity = 4) #' names(trees) +#' cons <- Consensus(trees) #' #' # In actual use, be sure to check that the score has converged on a global #' # optimum, conducting additional iterations and runs as necessary. @@ -216,14 +217,31 @@ #' # Now we must decide what to do with the multiple optimal trees from #' # each replicate. #' -#' # Treat each tree equally -#' JackLabels(ape::consensus(trees), unlist(jackTrees, recursive = FALSE)) +#' # Set graphical parameters for plotting +#' oPar <- par(mar = rep(0, 4), cex = 0.9) +#' +#' # Treat each tree as a separate replicate (problematic) +#' JackLabels(cons, unlist(jackTrees, recursive = FALSE)) #' #' # Take the strict consensus of all trees for each replicate -#' JackLabels(ape::consensus(trees), lapply(jackTrees, ape::consensus)) +#' JackLabels(cons, lapply(jackTrees, ape::consensus)) #' #' # Take a single tree from each replicate (the first; order's irrelevant) -#' JackLabels(ape::consensus(trees), lapply(jackTrees, `[[`, 1)) +#' JackLabels(cons, lapply(jackTrees, `[[`, 1)) +#' +#' # Count support if all most parsimonious trees support a split; +#' # contradiction if all trees contradict it; don't include replicates where +#' # not all trees agree on the resolution of a split. +#' labels <- JackLabels(cons, jackTrees) +#' +#' # How many iterations were decisive for each node? +#' attr(labels, "decisive") +#' +#' # Show as proportion +#' JackLabels(cons, jackTrees, showFrac = TRUE) +#' +#' # Restore graphical parameters +#' par(oPar) #' } #' #' # Tree search with a constraint @@ -935,17 +953,17 @@ MaximizeParsimony <- function (dataset, tree, #' @family split support functions #' @encoding UTF-8 #' @export -Resample <- function (dataset, tree, method = "jack", - proportion = 2/3, - ratchIter = 1L, tbrIter = 8L, finalIter = 3L, - maxHits = 12L, concavity = Inf, - tolerance = sqrt(.Machine[["double.eps"]]), - constraint, - verbosity = 2L, - ...) { +Resample <- function(dataset, tree, method = "jack", proportion = 2 / 3, + ratchIter = 1L, tbrIter = 8L, finalIter = 3L, + maxHits = 12L, concavity = Inf, + tolerance = sqrt(.Machine[["double.eps"]]), + constraint, verbosity = 2L, + ...) { + if (!inherits(dataset, "phyDat")) { stop("`dataset` must be of class `phyDat`.") } + index <- attr(dataset, "index") kept <- switch(pmatch(tolower(method), c("jackknife", "bootstrap")), { @@ -960,6 +978,7 @@ Resample <- function (dataset, tree, method = "jack", }, { sample(index, length(index), replace = TRUE) }) + if (is.null(kept)) { stop("`method` must be either \"jackknife\" or \"bootstrap\".") } diff --git a/R/PresentContra.R b/R/PresentContra.R index 71d6e9866..56a5c98ab 100644 --- a/R/PresentContra.R +++ b/R/PresentContra.R @@ -41,9 +41,9 @@ #' @param adj,col,frame,pos,\dots Parameters to pass to `nodelabels()`. #' #' @seealso -#' [`SplitFrequency()`] and [`MostContradictedFreq()`] will compute the number -#' of trees that contain the split, and the frequency of the most contradicted -#' split, respectively. +#' \code{\link[TreeTools]{SplitFrequency}()} and [`MostContradictedFreq()`] will +#' compute the number of trees that contain the split, and the frequency of the +#' most contradicted split, respectively. #' @references \insertAllCited{} #' @examples #' library("TreeTools", quietly = TRUE) # for as.phylo diff --git a/inst/WORDLIST b/inst/WORDLIST index 0bd6cd091..6ad16dccc 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -185,6 +185,7 @@ cf cla codecov colourblind +com dataset's dd doi @@ -195,6 +196,7 @@ entelegyne equiprobable ffmpeg frac +github gnathostome homoplasies homoplasious diff --git a/man/JackLabels.Rd b/man/JackLabels.Rd index 73a75cf38..25f9236f2 100644 --- a/man/JackLabels.Rd +++ b/man/JackLabels.Rd @@ -13,6 +13,8 @@ JackLabels( col = NULL, frame = "none", pos = 2L, + showFraction = FALSE, + format = "character", ... ) } @@ -30,16 +32,35 @@ unique split.} plot.} \item{adj, col, frame, pos, \dots}{Parameters to pass to \code{nodelabels()}.} + +\item{showFraction}{Logical specifying whether to also annotate nodes +with the fraction of replicates that were decisive for the split.} + +\item{format}{Character specifying return format. +\code{"character"} returns a character string suitable to add to the \code{node.labels} +attribute of a tree; +"numeric" returns numeric values suitable for further analysis.} } \value{ -A named vector specifying the proportion of jackknife trees +A named vector specifying the proportion of jackknife iterations consistent with each node in \code{tree}, as plotted. -If \code{plot = FALSE}, blank entries are included corresponding to nodes -that do not require labelling; the return value is in the value required -by \code{phylo$node.label}. +If \code{format = "character"}, blank entries are included corresponding to nodes +that do not require labels, such that the return value is in the format +required by \code{phylo$node.label}. +If multiple trees are specified per iteration, the return value has an +attribute \code{decisive} listing, for each entry in the return value, how many +iterations were decisive for that split. } \description{ -Label nodes with jackknife support values +\code{JackLabels()} produces a list of node labels denoting split support from +a set of resampled trees, optionally printing them on a tree. +} +\details{ +If an element of \code{jackTrees} contains multiple trees, then the iteration is +counted as supporting a split if all trees contain the split, and as +contradicting the split if no trees contain it. If a split is only present +in a subset of trees, that iteration is considered not to be decisive, and +is ignored when calculating the support for that split. } \examples{ library("TreeTools", quietly = TRUE) # for as.phylo diff --git a/man/Jackknife.Rd b/man/Jackknife.Rd index 69388c7df..48b7f0237 100644 --- a/man/Jackknife.Rd +++ b/man/Jackknife.Rd @@ -69,10 +69,8 @@ Resample trees using Jackknife resampling, i.e. removing a subset of characters. } \details{ -The function assumes -that \code{InitializeData()} will return a morphy object; if this doesn't hold -for you, post a \href{https://github.com/ms609/TreeSearch/issues/new/}{GitHub issue} -or e-mail the maintainer. +The function assumes that \code{InitializeData()} will return a morphy object; +if this doesn't hold for you, post a \href{https://github.com/ms609/TreeSearch/issues/new/}{GitHub issue} or e-mail the maintainer. } \seealso{ \itemize{ diff --git a/man/MaximizeParsimony.Rd b/man/MaximizeParsimony.Rd index d1d9290d8..32ceb4831 100644 --- a/man/MaximizeParsimony.Rd +++ b/man/MaximizeParsimony.Rd @@ -281,14 +281,15 @@ if (interactive()) { # Load data for analysis in R library("TreeTools") -data("congreveLamsdellMatrices", package = "TreeSearch") -dataset <- congreveLamsdellMatrices[[42]] +data("inapplicable.phyData", package = "TreeSearch") +dataset <- inapplicable.phyData[["Asher2005"]] # A very quick run for demonstration purposes trees <- MaximizeParsimony(dataset, ratchIter = 0, startIter = 0, tbrIter = 1, maxHits = 4, maxTime = 1/100, concavity = 10, verbosity = 4) names(trees) +cons <- Consensus(trees) # In actual use, be sure to check that the score has converged on a global # optimum, conducting additional iterations and runs as necessary. @@ -309,14 +310,31 @@ jackTrees <- replicate(nReplicates, # Now we must decide what to do with the multiple optimal trees from # each replicate. -# Treat each tree equally -JackLabels(ape::consensus(trees), unlist(jackTrees, recursive = FALSE)) +# Set graphical parameters for plotting +oPar <- par(mar = rep(0, 4), cex = 0.9) + +# Treat each tree as a separate replicate (problematic) +JackLabels(cons, unlist(jackTrees, recursive = FALSE)) # Take the strict consensus of all trees for each replicate -JackLabels(ape::consensus(trees), lapply(jackTrees, ape::consensus)) +JackLabels(cons, lapply(jackTrees, ape::consensus)) # Take a single tree from each replicate (the first; order's irrelevant) -JackLabels(ape::consensus(trees), lapply(jackTrees, `[[`, 1)) +JackLabels(cons, lapply(jackTrees, `[[`, 1)) + +# Count support if all most parsimonious trees support a split; +# contradiction if all trees contradict it; don't include replicates where +# not all trees agree on the resolution of a split. +labels <- JackLabels(cons, jackTrees) + +# How many iterations were decisive for each node? +attr(labels, "decisive") + +# Show as proportion +JackLabels(cons, jackTrees, showFrac = TRUE) + +# Restore graphical parameters +par(oPar) } # Tree search with a constraint diff --git a/man/PresCont.Rd b/man/PresCont.Rd index a7481ef19..6a2befbb7 100644 --- a/man/PresCont.Rd +++ b/man/PresCont.Rd @@ -98,9 +98,9 @@ gpc \insertAllCited{} } \seealso{ -\code{\link[=SplitFrequency]{SplitFrequency()}} and \code{\link[=MostContradictedFreq]{MostContradictedFreq()}} will compute the number -of trees that contain the split, and the frequency of the most contradicted -split, respectively. +\code{\link[TreeTools]{SplitFrequency}()} and \code{\link[=MostContradictedFreq]{MostContradictedFreq()}} will +compute the number of trees that contain the split, and the frequency of the +most contradicted split, respectively. Other split support functions: \code{\link{JackLabels}()}, diff --git a/tests/testthat/_snaps/Jackknife/plot-jackknife.svg b/tests/testthat/_snaps/Jackknife/plot-jackknife.svg index 381c69744..594a3f43a 100644 --- a/tests/testthat/_snaps/Jackknife/plot-jackknife.svg +++ b/tests/testthat/_snaps/Jackknife/plot-jackknife.svg @@ -56,18 +56,18 @@ t8 - 0.08 + 0.08 - 0.13 + 0.13 - 0.14 + 0.14 - 1 + 1 - 1 + 1 diff --git a/tests/testthat/test-Jackknife.R b/tests/testthat/test-Jackknife.R index a93d50597..9a2622027 100644 --- a/tests/testthat/test-Jackknife.R +++ b/tests/testthat/test-Jackknife.R @@ -31,16 +31,43 @@ test_that("Jackknife ouputs good for node.labels", { jackTrees <- as.phylo(1:100, 8) tree <- as.phylo(0, 8) - expect_equal(c("", "", "0.13", "0.08", "0.14", "1", "1"), - JackLabels(tree, jackTrees, plot = FALSE)) + expect_equal(JackLabels(tree, jackTrees, plot = FALSE, format = "char"), + c("", "", 0.13, 0.08, 0.14, 1, 1)) tree <- RootTree(as.phylo(0, 8), c("t1", "t4")) - expect_equal(c("", "0.08", "0.13", "", "0.14", "1", "1"), - JackLabels(tree, jackTrees, plot = FALSE)) + expect_equal(JackLabels(tree, jackTrees, plot = FALSE, format = "text"), + c("", 0.08, 0.13, "", 0.14, 1, 1)) + expect_equal(JackLabels(tree, jackTrees, plot = FALSE, format = "num"), + setNames(c(0.08, 0.13, 0.14, 1, 1), c(10, 11, 13:15))) skip_if_not_installed("vdiffr") vdiffr::expect_doppelganger("plot-jackknife", function() { - expect_equal(as.double(JackLabels(tree, jackTrees, plot = FALSE)[-c(1, 4)]), + expect_equal(JackLabels(tree, jackTrees, plot = FALSE), unname(JackLabels(tree, jackTrees))) }) }) + +test_that("JackLabels() handles multiple trees per iteration", { + tree <- BalancedTree(5) + dispute8 <- ape::read.tree(text = "(((t1, t3), t2), (t4, t5));") + disagree <- ape::read.tree(text = "(((t5, t2), t3), (t4, t1));") + jackTrees <- list( + c(dispute8, dispute8), + c(tree, tree), + c(dispute8, tree), + c(disagree, disagree, disagree), + BalancedTree(5) + ) + expect_equal( + JackLabels(tree, jackTrees, plot = FALSE, format = "Double"), + structure(c("7" = 4 / 5, "8" = 2 / 4), decisive = c("7" = 5, "8" = 4)) + ) + + lab <- JackLabels(tree, jackTrees, format = "character", showFraction = TRUE, + plot = FALSE) + tree[["node.label"]] <- lab + expect_equal(gsub("_", " ", fixed = TRUE, + ape::read.tree(text = ape::write.tree(tree) + )[["node.label"]]), + lab) +})