From 6f7d218d479b13648132fc069808e1febf773a50 Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Thu, 5 Feb 2026 09:23:42 +0000 Subject: [PATCH 1/8] Use int32 to support larger trees --- DESCRIPTION | 4 +- NEWS.md | 7 ++- src/day_1985.cpp | 81 ++++++++++++++++-------------- src/information.h | 26 +++++----- src/tree_distances.cpp | 19 ++++--- tests/testthat/test-day_1985.cpp.r | 8 ++- 6 files changed, 76 insertions(+), 69 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7fb958f8a..beccf1a3e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: TreeDist Type: Package Title: Calculate and Map Distances Between Phylogenetic Trees -Version: 2.11.1.9000 +Version: 2.11.1.9001 Authors@R: c(person("Martin R.", "Smith", email = "martin.smith@durham.ac.uk", role = c("aut", "cre", "cph", "prg"), @@ -50,7 +50,7 @@ Imports: Rdpack (>= 0.7), shiny, shinyjs, - TreeTools (>= 1.16), + TreeTools (>= 2.0.0.9002), Suggests: bookdown, cluster, diff --git a/NEWS.md b/NEWS.md index 257165b59..f41c29b43 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# TreeDist 2.11.1.9001 (2026-02-05) + +- Use int32 to support larger trees, per TreeTools v2.1.0. + + # TreeDist 2.11.1.9000 (2025-11-13) - `AHMI()` now returns negative values (previously zeroed in error). @@ -39,7 +44,7 @@ This was **fixed in v2.11.0**. - Faster tree distance calculation. - 2x speed-up of LAPJV for large matrices. -- Require R4.0; discontinue tests against R 3.6 and 4.0. +- Require R4.0; discontinue tests against R4.0. # TreeDist 2.9.2 (2025-01-11) diff --git a/src/day_1985.cpp b/src/day_1985.cpp index 89456f9d1..1ce115ffb 100644 --- a/src/day_1985.cpp +++ b/src/day_1985.cpp @@ -16,7 +16,7 @@ using TreeTools::ct_max_leaves; #include /* for log2(), ceil() */ #include /* for unique_ptr, make_unique */ -struct StackEntry { int16 L, R, N, W; }; +struct StackEntry { int32 L, R, N, W; }; // COMCLUSTER computes a strict consensus tree in O(knn). // COMCLUST requires O(kn). @@ -24,15 +24,16 @@ struct StackEntry { int16 L, R, N, W; }; // [[Rcpp::export]] int COMCLUST(List trees) { - int16 v = 0, w = 0; - int16 L, R, N, W; - int16 L_i, R_i, N_i, W_i; + int32 v = 0; + int32 w = 0; + int32 L, R, N, W; + int32 L_i, R_i, N_i, W_i; ClusterTable X(List(trees(0))); - std::array S; + std::array S; - for (int16 i = 1; i != trees.length(); i++) { - int16 Spos = 0; // Empty the stack S + for (int32 i = 1; i != trees.length(); i++) { + int32 Spos = 0; // Empty the stack S X.CLEAR(); ClusterTable Ti(List(trees(i))); @@ -77,18 +78,24 @@ int COMCLUST(List trees) { double consensus_info(const List trees, const LogicalVector phylo, const NumericVector p) { - int16 v = 0, w = 0, - L, R, N, W, - L_j, R_j, N_j, W_j - ; - const int16 n_trees = trees.length(); + int32 v = 0; + int32 w = 0; + int32 L; + int32 R; + int32 N; + int32 W; + int32 L_j; + int32 R_j; + int32 N_j; + int32 W_j; + const int32 n_trees = trees.length(); std::vector tables; if (std::size_t(n_trees) > tables.max_size()) { Rcpp::stop("Not enough memory available to compute consensus of so many trees"); // LCOV_EXCL_LINE } tables.reserve(n_trees); - for (int16 i = n_trees; i--; ) { + for (int32 i = n_trees; i--; ) { tables.emplace_back(ClusterTable(List(trees(i)))); } @@ -97,39 +104,38 @@ double consensus_info(const List trees, const LogicalVector phylo, } else if (p[0] < 0.5) { Rcpp::stop("p must be >= 0.5 in consensus_info()"); } - const int16 - n_tip = tables[0].N(), - thresh = p[0] <= 0.5 ? + const int32 n_tip = tables[0].N(); + const int32 thresh = p[0] <= 0.5 ? (n_trees / 2) + 1 : // Splits must occur in MORE THAN 0.5 to be in majority. - std::ceil(p[0] * n_trees), - must_occur_before = 1 + n_trees - thresh - ; + std::ceil(p[0] * n_trees); + const int32 must_occur_before = 1 + n_trees - thresh; const bool phylo_info = phylo[0]; - std::array S; - std::array split_count; + std::array S; + std::array split_count; double info = 0; const std::size_t ntip_3 = n_tip - 3; // All clades in p consensus must occur in first (1-p) of trees. - for (int16 i = 0; i != must_occur_before; i++) { + for (int32 i = 0; i < must_occur_before; ++i) { if (tables[i].NOSWX(ntip_3)) { - continue; + continue; } - std::vector split_size(n_tip); + std::vector split_size(n_tip); std::fill(split_count.begin(), split_count.begin() + n_tip, 1); - for (int16 j = i + 1; j != n_trees; j++) { + for (int32 j = i + 1; j < n_trees; ++j) { tables[i].CLEAR(); tables[j].TRESET(); tables[j].READT(&v, &w); - int16 j_pos = 0, Spos = 0; // Empty the stack S + int32 j_pos = 0; + int32 Spos = 0; // Empty the stack S do { if (IS_LEAF(v)) { @@ -177,8 +183,8 @@ double consensus_info(const List trees, const LogicalVector phylo, } while (v); } - int16 splits_found = 0; - for (int16 k = n_tip; k--; ) { + int32 splits_found = 0; + for (int32 k = n_tip; k--; ) { if (split_count[k] >= thresh) { ++splits_found; if (phylo_info) { @@ -231,8 +237,9 @@ IntegerVector robinson_foulds_all_pairs(List tables) { for (int j = i + 1; j < n_trees; ++j) { - int16 v, w; - int16 n_shared = 0; + int32 v; + int32 w; + int32 n_shared = 0; ClusterTable* Tj = tbl[j]; @@ -250,18 +257,18 @@ IntegerVector robinson_foulds_all_pairs(List tables) { } else { ASSERT(S_top > S_entries.data()); const StackEntry& entry = *--S_top; - int16 L = entry.L; - int16 R = entry.R; - int16 N = entry.N; - const int16 W_i = entry.W; - int16 W = 1 + W_i; + int32 L = entry.L; + int32 R = entry.R; + int32 N = entry.N; + const int32 W_i = entry.W; + int32 W = 1 + W_i; w -= W_i; if (w) { // Unroll first iteration - common case ASSERT(S_top > S_entries.data()); const StackEntry& entry = *--S_top; - const int16 W_i = entry.W; + const int32 W_i = entry.W; L = std::min(L, entry.L); // Faster than ternary operator R = std::max(R, entry.R); @@ -272,7 +279,7 @@ IntegerVector robinson_foulds_all_pairs(List tables) { while (w) { ASSERT(S_top > S_entries.data()); const StackEntry& entry = *--S_top; - const int16 W_i = entry.W; + const int32 W_i = entry.W; L = std::min(L, entry.L); R = std::max(R, entry.R); diff --git a/src/information.h b/src/information.h index 67ebd625c..3523ff56e 100644 --- a/src/information.h +++ b/src/information.h @@ -43,9 +43,9 @@ __attribute__((constructor)) } } -inline double split_phylo_info (const int16 n_in, const int16 *n_tip, - const double p) { - const int16 n_out = *n_tip - n_in; +inline double split_phylo_info(const int32 n_in, const int32 *n_tip, + const double p) { + const int32 n_out = *n_tip - n_in; assert(p > 0); assert(p <= 1); assert(n_in > 1); @@ -53,14 +53,12 @@ inline double split_phylo_info (const int16 n_in, const int16 *n_tip, if (p == 1) { return (l2unrooted[*n_tip] - l2rooted[n_in] - l2rooted[n_out]); } else { - const double - q = 1 - p, - l2n = l2unrooted[*n_tip], - l2n_consistent = l2rooted[n_in] + l2rooted[n_out], - l2p_consistent = l2n_consistent - l2n, - l2p_inconsistent = log2(-expm1(l2p_consistent * log_2)), - l2n_inconsistent = l2p_inconsistent + l2n - ; + const double q = 1 - p; + const double l2n = l2unrooted[*n_tip]; + const double l2n_consistent = l2rooted[n_in] + l2rooted[n_out]; + const double l2p_consistent = l2n_consistent - l2n; + const double l2p_inconsistent = log2(-expm1(l2p_consistent * log_2)); + const double l2n_inconsistent = l2p_inconsistent + l2n; return(l2n + p * (log2(p) - l2n_consistent) + @@ -68,9 +66,9 @@ inline double split_phylo_info (const int16 n_in, const int16 *n_tip, } } -inline double split_clust_info (const int16 n_in, const int16 *n_tip, - const double p) { - const int16 n_out = *n_tip - n_in; +inline double split_clust_info(const int32 n_in, const int32 *n_tip, + const double p) { + const int32 n_out = *n_tip - n_in; assert(p > 0); assert(p <= 1); assert(n_in > 1); diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp index bc560f087..2fe31d9b3 100644 --- a/src/tree_distances.cpp +++ b/src/tree_distances.cpp @@ -63,12 +63,11 @@ List cpp_robinson_foulds_distance(const RawMatrix &x, const RawMatrix &y, if (x.cols() != y.cols()) { Rcpp::stop("Input splits must address same number of tips."); } - TreeDist::check_ntip(nTip[0]); const SplitList a(x), b(y); - const int16 last_bin = a.n_bins - 1; - const int16 n_tips = int16(nTip[0]); - const int16 unset_tips = (n_tips % SL_BIN_SIZE) ? + const int32 last_bin = a.n_bins - 1; + const int32 n_tips = int32(nTip[0]); + const int32 unset_tips = (n_tips % SL_BIN_SIZE) ? SL_BIN_SIZE - n_tips % SL_BIN_SIZE : 0; const splitbit unset_mask = ALL_ONES >> unset_tips; cost score = 0; @@ -76,27 +75,27 @@ List cpp_robinson_foulds_distance(const RawMatrix &x, const RawMatrix &y, grf_match matching(a.n_splits, NA_INTEGER); splitbit b_complement[SL_MAX_SPLITS][SL_MAX_BINS]; - for (int16 i = b.n_splits; i--; ) { - for (int16 bin = last_bin; bin--; ) { + for (int32 i = b.n_splits; i--; ) { + for (int32 bin = last_bin; bin--; ) { b_complement[i][bin] = ~b.state[i][bin]; } b_complement[i][last_bin] = b.state[i][last_bin] ^ unset_mask; } - for (int16 ai = a.n_splits; ai--; ) { - for (int16 bi = b.n_splits; bi--; ) { + for (int32 ai = a.n_splits; ai--; ) { + for (int32 bi = b.n_splits; bi--; ) { bool all_match = true; bool all_complement = true; - for (int16 bin = 0; bin < a.n_bins; ++bin) { + for (int32 bin = 0; bin < a.n_bins; ++bin) { if ((a.state[ai][bin] != b.state[bi][bin])) { all_match = false; break; } } if (!all_match) { - for (int16 bin = 0; bin < a.n_bins; ++bin) { + for (int32 bin = 0; bin < a.n_bins; ++bin) { if (a.state[ai][bin] != b_complement[bi][bin]) { all_complement = false; break; diff --git a/tests/testthat/test-day_1985.cpp.r b/tests/testthat/test-day_1985.cpp.r index 8f25f0d26..8e640ace0 100644 --- a/tests/testthat/test-day_1985.cpp.r +++ b/tests/testthat/test-day_1985.cpp.r @@ -1,9 +1,7 @@ -test_that("Day 1985 overflow", { +test_that("Day 1985 does not overflow", { bigTree <- PectinateTree(2^14 + 1) - expect_error(TreeTools::as.ClusterTable(bigTree), - "Tree has too many leaves. Contact the .TreeTools. maintainer") - expect_error(RobinsonFoulds(list(bigTree, bigTree)), - "Tree has too many leaves. Contact the .TreeTools. maintainer") + expect_no_error(TreeTools::as.ClusterTable(bigTree)) + expect_equal(RobinsonFoulds(list(bigTree, bigTree))[[1]], 0) }) test_that("Day 1985 examples", { From 67154318d690f6476fc1c28d3edfa8b5296290aa Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Thu, 5 Feb 2026 09:24:38 +0000 Subject: [PATCH 2/8] Update DESCRIPTION --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index beccf1a3e..3ae99d2dd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -80,6 +80,7 @@ Suggests: LinkingTo: Rcpp, TreeTools (>= 1.16.1), +Remotes: ms609/TreeTools RdMacros: Rdpack VignetteBuilder: knitr Config/Needs/app/optional: uwot From 459d684a09b096c7af3dc42ccdc5d5ecf01a2e41 Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Thu, 5 Feb 2026 09:39:26 +0000 Subject: [PATCH 3/8] Reword --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index f41c29b43..cd43b4f60 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ # TreeDist 2.11.1.9001 (2026-02-05) -- Use int32 to support larger trees, per TreeTools v2.1.0. +- Support larger trees by updating some functions to use 32-bit integers, per TreeTools v2.1.0. # TreeDist 2.11.1.9000 (2025-11-13) From 54879438d5878e6ddcd1a6b7d003b8bf2b2de630 Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Thu, 5 Feb 2026 10:02:23 +0000 Subject: [PATCH 4/8] -CT_MAX_LEAVES --- src/information.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/information.h b/src/information.h index 3523ff56e..f15dac9d1 100644 --- a/src/information.h +++ b/src/information.h @@ -2,8 +2,6 @@ #define _TREEDIST_INFO_H #include /* for log2() */ -#include /* for CT_MAX_LEAVES */ - #include "ints.h" /* for int16 */ constexpr int_fast32_t LOG_MAX = 2048; @@ -15,7 +13,10 @@ void compute_log2_table() { } } -constexpr int_fast32_t FACT_MAX = CT_MAX_LEAVES + CT_MAX_LEAVES + 5 + 1; +// 16383 is a legacy value from TreeTools v2.0.0 +// TODO consider increasing; or support more by calculation. +constexpr int_fast32_t CT_MAX_LEAVES = 16383; +constexpr int_fast32_t FACT_MAX = (CT_MAX_LEAVES * 2) + 5 + 1; constexpr double log_2 = 0.6931471805599452862268; double ldfact[FACT_MAX]; @@ -45,6 +46,9 @@ __attribute__((constructor)) inline double split_phylo_info(const int32 n_in, const int32 *n_tip, const double p) { + if (*n_tip > CT_MAX_LEAVES) { + Rcpp::stop("This many leaves are not yet supported."); + } const int32 n_out = *n_tip - n_in; assert(p > 0); assert(p <= 1); @@ -68,6 +72,9 @@ inline double split_phylo_info(const int32 n_in, const int32 *n_tip, inline double split_clust_info(const int32 n_in, const int32 *n_tip, const double p) { + if (*n_tip > CT_MAX_LEAVES) { + Rcpp::stop("This many leaves are not yet supported."); + } const int32 n_out = *n_tip - n_in; assert(p > 0); assert(p <= 1); From 1df69eddf4c47365ec32514847519a95e5b15e91 Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Thu, 5 Feb 2026 10:02:32 +0000 Subject: [PATCH 5/8] pass by value, not reference --- src/day_1985.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/day_1985.cpp b/src/day_1985.cpp index 1ce115ffb..1bfab7d62 100644 --- a/src/day_1985.cpp +++ b/src/day_1985.cpp @@ -159,7 +159,7 @@ double consensus_info(const List trees, const LogicalVector phylo, // Split has already been counted; next! } else { if (N == R - L + 1) { // L..R is contiguous, and must be tested - if (tables[i].CLUSTONL(&L, &R)) { + if (tables[i].CLUSTONL(L, R)) { tables[j].SETSWX(j_pos); assert(L > 0); ++split_count[L - 1]; @@ -167,7 +167,7 @@ double consensus_info(const List trees, const LogicalVector phylo, split_size[L - 1] = N; } assert(split_size[L - 1] > 0); - } else if (tables[i].CLUSTONR(&L, &R)) { + } else if (tables[i].CLUSTONR(L, R)) { tables[j].SETSWX(j_pos); assert(R > 0); ++split_count[R - 1]; From aa338f5a6e3fcf04bd88145a5b642e6b0015eadb Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Thu, 5 Feb 2026 10:38:49 +0000 Subject: [PATCH 6/8] don't need arm and x64 --- .github/workflows/R-CMD-check.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/R-CMD-check.yml b/.github/workflows/R-CMD-check.yml index 76cfdee75..247aab813 100644 --- a/.github/workflows/R-CMD-check.yml +++ b/.github/workflows/R-CMD-check.yml @@ -54,8 +54,6 @@ jobs: rspm: "https://packagemanager.posit.co/cran/__linux__/jammy/latest"} - {os: ubuntu-24.04-arm, r: "release", rspm: "https://packagemanager.posit.co/cran/__linux__/noble/latest"} - - {os: ubuntu-24.04, r: 'release', - rspm: "https://packagemanager.posit.co/cran/__linux__/noble/latest"} - {os: ubuntu-latest, r: 'devel', rspm: "https://packagemanager.posit.co/cran/__linux__/noble/latest"} From c4c56e5100b08c844793b4fcae6c29167a6cbb34 Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Thu, 5 Feb 2026 10:39:37 +0000 Subject: [PATCH 7/8] pandoc sorted by deps --- .github/workflows/R-CMD-check.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/R-CMD-check.yml b/.github/workflows/R-CMD-check.yml index 247aab813..9cb64cabd 100644 --- a/.github/workflows/R-CMD-check.yml +++ b/.github/workflows/R-CMD-check.yml @@ -117,9 +117,6 @@ jobs: uwot=?ignore-before-r=4.4.0 needs: | check - - - name: Set up pandoc - uses: r-lib/actions/setup-pandoc@v2 - name: Check package uses: r-lib/actions/check-r-package@v2 From 33d7afc445a2be875f7520e55adeef4d35e8a9b6 Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Thu, 5 Feb 2026 10:43:26 +0000 Subject: [PATCH 8/8] Don't spambard --- .github/workflows/R-CMD-check.yml | 39 +++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/.github/workflows/R-CMD-check.yml b/.github/workflows/R-CMD-check.yml index 9cb64cabd..2b1f4d353 100644 --- a/.github/workflows/R-CMD-check.yml +++ b/.github/workflows/R-CMD-check.yml @@ -132,16 +132,35 @@ jobs: with: github-token: ${{ secrets.GITHUB_TOKEN }} script: | - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: 164, - body: 'Scheduled workflow has failed: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}' - }); - - await github.rest.issues.update({ + const issue_number = 164; + + const { data: comments } = await github.rest.issues.listComments({ owner: context.repo.owner, repo: context.repo.repo, - issue_number: 164, - state: 'open' + issue_number: issue_number, + per_page: 3 }); + + const oneHourAgo = new Date(Date.now() - 60 * 60 * 1000); + const recentFailureNotified = comments.some(comment => + new Date(comment.created_at) > oneHourAgo && + comment.body.includes('Scheduled workflow has failed') + ); + + if (recentFailureNotified) { + console.log("Recently notified; don't bombard"); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issue_number, + body: 'Scheduled workflow has failed: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}' + }); + + await github.rest.issues.update({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issue_number, + state: 'open' + }); + }