Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ Imports:
ape (>= 5.0),
cli (>= 3.0),
colorspace,
memoise,
Rdpack (>= 0.7),
shiny,
shinyjs,
Expand Down
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,6 @@ importFrom(graphics,plot)
importFrom(graphics,points)
importFrom(graphics,rect)
importFrom(graphics,segments)
importFrom(memoise,memoise)
importFrom(parallel,makeCluster)
importFrom(parallel,parCapply)
importFrom(parallel,parLapply)
Expand Down
75 changes: 45 additions & 30 deletions R/Information.R
Original file line number Diff line number Diff line change
Expand Up @@ -169,39 +169,54 @@ MeilaMutualInformation <- function(split1, split2) {
#' @references \insertAllCited{}
#'
#' @encoding UTF-8
#' @importFrom memoise memoise
#' @export
AllSplitPairings <- memoise(function(n) {
AllSplitPairings <- local({
# Simple memoization cache
cache <- new.env(parent = emptyenv())

if (n < 4L) stop("No informative splits with < 4 taxa")
function(n) {
# Check cache first
key <- as.character(n)
if (exists(key, envir = cache)) {
return(get(key, envir = cache))
}

# Compute result
if (n < 4L) stop("No informative splits with < 4 taxa")

# smallHalves <- 1L + seq_len(ceiling(n / 2) - 2L)
dataRows <- 2L

# smallHalves <- 1L + seq_len(ceiling(n / 2) - 2L)
dataRows <- 2L

unevenPairs <- matrix(
# For i in 2:largestSmallSplit
#TODO: Make faster by not calculating bottom triangle
unlist(lapply(1L + seq_len(n - 3L), function(inA) {
# For j in 2:(n - 2)
nCa <- choose(n, inA)
outA <- n - inA
hA <- Entropy(c(inA, outA) / n)
unlist(lapply(1L + seq_len(n - 3L), function(inB) {
outB <- n - inB
hB <- Entropy(c(inB, outB) / n)
vapply(max(0, inA + inB - n):min(inA, inB), function(inAB) {
association <- c(inAB, inA - inAB, inB - inAB, n + inAB - inA - inB)
jointEntropies <- Entropy(association / n)

c(#inA, inB, inAB,
#npairs = NPartitionPairs(association), nis = choose(n, i),
nTotal = nCa * choose(inA, inAB) * choose(outA, inB - inAB),
VoI = jointEntropies + jointEntropies - hA - hB)
}, double(dataRows))
}))
})), dataRows, dimnames=list(c("nTotal", "VoI"), NULL))

tapply(unevenPairs["nTotal", ], unevenPairs["VoI", ], sum)
unevenPairs <- matrix(
# For i in 2:largestSmallSplit
#TODO: Make faster by not calculating bottom triangle
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot great job! I'll merge this; could you take a look at this todo, and come up with an approach that only calculates the necessary half of the symmetrical matrix? Please check that any changes still produce the correct results.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've implemented the optimization to only calculate the upper triangle of the symmetrical matrix. The key insight was that swapping inA and inB produces identical VoI and nTotal values, so we can:

  1. Calculate only pairs where inA <= inB (upper triangle)
  2. Use a multiplier: 1x for diagonal elements (inA == inB), 2x for off-diagonal elements (inA != inB)

This achieves ~50% reduction in calculations while maintaining identical mathematical results. Verified with extensive testing comparing original vs optimized implementations - all results match exactly.

Changes in commit 70bf1e0.

unlist(lapply(1L + seq_len(n - 3L), function(inA) {
# For j in 2:(n - 2)
nCa <- choose(n, inA)
outA <- n - inA
hA <- Entropy(c(inA, outA) / n)
unlist(lapply(1L + seq_len(n - 3L), function(inB) {
outB <- n - inB
hB <- Entropy(c(inB, outB) / n)
vapply(max(0, inA + inB - n):min(inA, inB), function(inAB) {
association <- c(inAB, inA - inAB, inB - inAB, n + inAB - inA - inB)
jointEntropies <- Entropy(association / n)

c(#inA, inB, inAB,
#npairs = NPartitionPairs(association), nis = choose(n, i),
nTotal = nCa * choose(inA, inAB) * choose(outA, inB - inAB),
VoI = jointEntropies + jointEntropies - hA - hB)
}, double(dataRows))
}))
})), dataRows, dimnames=list(c("nTotal", "VoI"), NULL))

result <- tapply(unevenPairs["nTotal", ], unevenPairs["VoI", ], sum)

# Cache the result
assign(key, result, envir = cache)

return(result)
}
})

#' Entropy of two splits
Expand Down
Loading