From 310fc729b26b3e4dac09d099ce025003b970eb5e Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Thu, 6 Feb 2025 15:58:41 +0000 Subject: [PATCH 1/4] Option to weight averages --- R/Concordance.R | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/R/Concordance.R b/R/Concordance.R index c5a69a0c0..9bc66c6e5 100644 --- a/R/Concordance.R +++ b/R/Concordance.R @@ -12,6 +12,22 @@ #' with any tree that groups the first two leaves together to the exclusion #' of the second. #' +#' By default, the reported value weights each site by the number of quartets +#' it is decisive for. This value can be interpreted as the proportion of +#' all decisive quartets that are concordant with a split. +#' If `weight = FALSE`, the reported value is the mean of the concordance +#' value for each site. +#' Consider a split associated with two sites: +#' one that is concordant with 25% of 96 decisive quartets, and +#' a second that is concordant with 75% of 4 decisive quartets. +#' If `weight = TRUE`, the split concordance will be 24 + 3 / 96 + 4 = 27%. +#' If `weight = FALSE`, the split concordance will be mean(75%, 25%) = 50%. +#' +#' `QuartetConcordance()` is computed exactly, using all quartets, where as +#' other implementations (e.g. IQTREE) follow +#' \insertCite{@Minh2020;textual}{TreeSearch} in using a random subsample +#' of quartets for a faster, if potentially less accurate, computation. +#' # `ClusteringConcordance()` and `PhylogeneticConcordance()` respectively report # the proportion of clustering information and phylogenetic information # \insertCite{@as defined in @Vinh2010, @SmithDist}{TreeDist} within a dataset @@ -28,6 +44,8 @@ #' #' @template treeParam #' @template datasetParam +#' @param weight Logical specifying whether to weight sites according to the +#' number of quartets they are decisive for. #' #' #' @@ -60,7 +78,7 @@ #' @name SiteConcordance #' @family split support functions #' @export -QuartetConcordance <- function (tree, dataset = NULL) { +QuartetConcordance <- function (tree, dataset = NULL, weight = TRUE) { if (is.null(dataset)) { warning("Cannot calculate concordance without `dataset`.") return(NULL) @@ -75,7 +93,7 @@ QuartetConcordance <- function (tree, dataset = NULL) { cli_progress_bar(name = "Quartet concordance", total = dim(logiSplits)[[2]]) setNames(apply(logiSplits, 2, function (split) { cli_progress_update(1, .envir = parent.frame(2)) - quarts <- rowSums(apply(characters, 2, function (char) { + quarts <- apply(characters, 2, function (char) { tab <- table(split, char) nCol <- dim(tab)[[2]] if (nCol > 1L) { @@ -106,12 +124,21 @@ QuartetConcordance <- function (tree, dataset = NULL) { # Only quartets that include two T and two F can be decisive # Quartets must also include two pairs of characters decisive <- concordant + discordant + + # Return the numerator and denominatory of equation 2 in + # Minh et al. 2020 c(concordant, decisive) } else { c(0L, 0L) } - })) - ifelse(is.nan(quarts[[2]]), NA_real_, quarts[[1]] / quarts[[2]]) + }) + if (isTRUE(weight)) { + quartSums <- rowSums(quarts) + ifelse(is.nan(quartSums[[2]]), NA_real_, quartSums[[1]] / quartSums[[2]]) + } else { + mean(ifelse(is.nan(quarts[2, ]), NA_real_, quarts[1, ] / quarts[2, ]), + na.rm = TRUE) + } }), names(splits)) } From 184c5f749b853dba223fb03a7ff1b641737e4625 Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Thu, 6 Feb 2025 16:10:51 +0000 Subject: [PATCH 2/4] Improve examples --- R/Concordance.R | 13 +++++++------ man/SiteConcordance.Rd | 34 +++++++++++++++++++++++++++------- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/R/Concordance.R b/R/Concordance.R index 9bc66c6e5..fa4a9a745 100644 --- a/R/Concordance.R +++ b/R/Concordance.R @@ -62,14 +62,15 @@ #' spc <- SharedPhylogeneticConcordance(tree, dataset) #' mcc <- MutualClusteringConcordance(tree, dataset) #' -#' oPar <- par(mar = rep(0, 4), cex = 0.8) +#' oPar <- par(mar = rep(0, 4), cex = 0.8) # Set plotting parameters #' plot(tree) -#' TreeTools::LabelSplits(tree, signif(qc, 3)) -#' TreeTools::LabelSplits(tree, signif(cc, 3)) -#' TreeTools::LabelSplits(tree, signif(pc, 3)) -#' par(oPar) +#' TreeTools::LabelSplits(tree, signif(qc, 3), cex = 0.8) +#' plot(tree) +#' TreeTools::LabelSplits(tree, signif(cc, 3), cex = 0.8) +#' par(oPar) # Restore plotting parameters #' -#' pairs(cbind(qc, cc, pc, spc, mcc)) +#' # Display correlation between concordance factors +#' pairs(cbind(qc, cc, pc, spc, mcc), asp = 1) #' @template MRS #' @importFrom ape keep.tip #' @importFrom cli cli_progress_bar cli_progress_update diff --git a/man/SiteConcordance.Rd b/man/SiteConcordance.Rd index b6d1e39fb..c1a5dbaee 100644 --- a/man/SiteConcordance.Rd +++ b/man/SiteConcordance.Rd @@ -9,7 +9,7 @@ \alias{SharedPhylogeneticConcordance} \title{Calculate site concordance factor} \usage{ -QuartetConcordance(tree, dataset = NULL) +QuartetConcordance(tree, dataset = NULL, weight = TRUE) ClusteringConcordance(tree, dataset) @@ -24,6 +24,9 @@ SharedPhylogeneticConcordance(tree, dataset) \item{dataset}{A phylogenetic data matrix of \pkg{phangorn} class \code{phyDat}, whose names correspond to the labels of any accompanying tree.} + +\item{weight}{Logical specifying whether to weight sites according to the +number of quartets they are decisive for.} } \description{ The site concordance factor \insertCite{Minh2020}{TreeSearch} is a measure @@ -39,6 +42,22 @@ But a quartet with characters \verb{0 0 1 1} is decisive, and is concordant with any tree that groups the first two leaves together to the exclusion of the second. +By default, the reported value weights each site by the number of quartets +it is decisive for. This value can be interpreted as the proportion of +all decisive quartets that are concordant with a split. +If \code{weight = FALSE}, the reported value is the mean of the concordance +value for each site. +Consider a split associated with two sites: +one that is concordant with 25\% of 96 decisive quartets, and +a second that is concordant with 75\% of 4 decisive quartets. +If \code{weight = TRUE}, the split concordance will be 24 + 3 / 96 + 4 = 27\%. +If \code{weight = FALSE}, the split concordance will be mean(75\%, 25\%) = 50\%. + +\code{QuartetConcordance()} is computed exactly, using all quartets, where as +other implementations (e.g. IQTREE) follow +\insertCite{@Minh2020;textual}{TreeSearch} in using a random subsample +of quartets for a faster, if potentially less accurate, computation. + \strong{NOTE:} These functions are under development. They are incompletely tested, and may change without notice. Complete documentation and discussion will follow in due course. @@ -53,14 +72,15 @@ pc <- PhylogeneticConcordance(tree, dataset) spc <- SharedPhylogeneticConcordance(tree, dataset) mcc <- MutualClusteringConcordance(tree, dataset) -oPar <- par(mar = rep(0, 4), cex = 0.8) +oPar <- par(mar = rep(0, 4), cex = 0.8) # Set plotting parameters +plot(tree) +TreeTools::LabelSplits(tree, signif(qc, 3), cex = 0.8) plot(tree) -TreeTools::LabelSplits(tree, signif(qc, 3)) -TreeTools::LabelSplits(tree, signif(cc, 3)) -TreeTools::LabelSplits(tree, signif(pc, 3)) -par(oPar) +TreeTools::LabelSplits(tree, signif(cc, 3), cex = 0.8) +par(oPar) # Restore plotting parameters -pairs(cbind(qc, cc, pc, spc, mcc)) +# Display correlation between concordance factors +pairs(cbind(qc, cc, pc, spc, mcc), asp = 1) } \references{ \insertAllCited{} From 242f7039d5287756d1248bdef23b50a64992b464 Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Thu, 6 Feb 2025 16:12:56 +0000 Subject: [PATCH 3/4] rm dead link --- vignettes/getting-started.Rmd | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vignettes/getting-started.Rmd b/vignettes/getting-started.Rmd index c1fdf5bb0..a84a960d5 100644 --- a/vignettes/getting-started.Rmd +++ b/vignettes/getting-started.Rmd @@ -17,10 +17,10 @@ datasets. ### Simple installation -To use "TreeSearch" you will first need to [install R](https://CRAN.R-project.org/). -[RStudio](https://posit.co/) -([overview](https://dss.princeton.edu/training/RStudio101.pdf)) is a popular -front-end that makes several of R's features easier to use. +To use "TreeSearch" you will first need to [install R]( +https://CRAN.R-project.org/). +[RStudio](https://posit.co/) is a popular front-end that makes several of R's +features easier to use. The "TreeSearch" package can be installed as any other package. To get the latest stable version from CRAN, type From c70ab0ed84c41ef31718b79dc28a4ca0dd1354f2 Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Thu, 6 Feb 2025 16:20:04 +0000 Subject: [PATCH 4/4] IQ- --- R/Concordance.R | 2 +- man/SiteConcordance.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/Concordance.R b/R/Concordance.R index fa4a9a745..f47ed056c 100644 --- a/R/Concordance.R +++ b/R/Concordance.R @@ -24,7 +24,7 @@ #' If `weight = FALSE`, the split concordance will be mean(75%, 25%) = 50%. #' #' `QuartetConcordance()` is computed exactly, using all quartets, where as -#' other implementations (e.g. IQTREE) follow +#' other implementations (e.g. IQ-TREE) follow #' \insertCite{@Minh2020;textual}{TreeSearch} in using a random subsample #' of quartets for a faster, if potentially less accurate, computation. #' diff --git a/man/SiteConcordance.Rd b/man/SiteConcordance.Rd index c1a5dbaee..85e38d090 100644 --- a/man/SiteConcordance.Rd +++ b/man/SiteConcordance.Rd @@ -54,7 +54,7 @@ If \code{weight = TRUE}, the split concordance will be 24 + 3 / 96 + 4 = 27\%. If \code{weight = FALSE}, the split concordance will be mean(75\%, 25\%) = 50\%. \code{QuartetConcordance()} is computed exactly, using all quartets, where as -other implementations (e.g. IQTREE) follow +other implementations (e.g. IQ-TREE) follow \insertCite{@Minh2020;textual}{TreeSearch} in using a random subsample of quartets for a faster, if potentially less accurate, computation.