Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 29 additions & 15 deletions .github/workflows/R-CMD-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@ jobs:
rspm: "https://packagemanager.posit.co/cran/__linux__/jammy/latest"}
- {os: ubuntu-24.04-arm, r: "release",
rspm: "https://packagemanager.posit.co/cran/__linux__/noble/latest"}
- {os: ubuntu-24.04, r: 'release',
rspm: "https://packagemanager.posit.co/cran/__linux__/noble/latest"}
- {os: ubuntu-latest, r: 'devel',
rspm: "https://packagemanager.posit.co/cran/__linux__/noble/latest"}

Expand Down Expand Up @@ -119,9 +117,6 @@ jobs:
uwot=?ignore-before-r=4.4.0
needs: |
check

- name: Set up pandoc
uses: r-lib/actions/setup-pandoc@v2

- name: Check package
uses: r-lib/actions/check-r-package@v2
Expand All @@ -137,16 +132,35 @@ jobs:
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: 164,
body: 'Scheduled workflow has failed: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}'
});

await github.rest.issues.update({
const issue_number = 164;

const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: 164,
state: 'open'
issue_number: issue_number,
per_page: 3
});

const oneHourAgo = new Date(Date.now() - 60 * 60 * 1000);
const recentFailureNotified = comments.some(comment =>
new Date(comment.created_at) > oneHourAgo &&
comment.body.includes('Scheduled workflow has failed')
);

if (recentFailureNotified) {
console.log("Recently notified; don't bombard");
} else {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issue_number,
body: 'Scheduled workflow has failed: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}'
});

await github.rest.issues.update({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issue_number,
state: 'open'
});
}
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: TreeDist
Type: Package
Title: Calculate and Map Distances Between Phylogenetic Trees
Version: 2.11.1.9000
Version: 2.11.1.9001
Authors@R: c(person("Martin R.", "Smith",
email = "martin.smith@durham.ac.uk",
role = c("aut", "cre", "cph", "prg"),
Expand Down Expand Up @@ -50,7 +50,7 @@ Imports:
Rdpack (>= 0.7),
shiny,
shinyjs,
TreeTools (>= 1.16),
TreeTools (>= 2.0.0.9002),
Suggests:
bookdown,
cluster,
Expand Down Expand Up @@ -80,6 +80,7 @@ Suggests:
LinkingTo:
Rcpp,
TreeTools (>= 1.16.1),
Remotes: ms609/TreeTools
RdMacros: Rdpack
VignetteBuilder: knitr
Config/Needs/app/optional: uwot
Expand Down
7 changes: 6 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# TreeDist 2.11.1.9001 (2026-02-05)

- Support larger trees by updating some functions to use 32-bit integers, per TreeTools v2.1.0.


# TreeDist 2.11.1.9000 (2025-11-13)

- `AHMI()` now returns negative values (previously zeroed in error).
Expand Down Expand Up @@ -39,7 +44,7 @@ This was **fixed in v2.11.0**.
- Faster tree distance calculation.
- 2x speed-up of LAPJV for large matrices.

- Require R4.0; discontinue tests against R 3.6 and 4.0.
- Require R4.0; discontinue tests against R4.0.


# TreeDist 2.9.2 (2025-01-11)
Expand Down
85 changes: 46 additions & 39 deletions src/day_1985.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,24 @@ using TreeTools::ct_max_leaves;
#include <cmath> /* for log2(), ceil() */
#include <memory> /* for unique_ptr, make_unique */

struct StackEntry { int16 L, R, N, W; };
struct StackEntry { int32 L, R, N, W; };

// COMCLUSTER computes a strict consensus tree in O(knn).
// COMCLUST requires O(kn).
// trees is a list of objects of class phylo.
// [[Rcpp::export]]
int COMCLUST(List trees) {

int16 v = 0, w = 0;
int16 L, R, N, W;
int16 L_i, R_i, N_i, W_i;
int32 v = 0;
int32 w = 0;
int32 L, R, N, W;
int32 L_i, R_i, N_i, W_i;

ClusterTable X(List(trees(0)));
std::array<int16, TreeTools::ct_max_leaves> S;
std::array<int32, TreeTools::ct_max_leaves> S;

for (int16 i = 1; i != trees.length(); i++) {
int16 Spos = 0; // Empty the stack S
for (int32 i = 1; i != trees.length(); i++) {
int32 Spos = 0; // Empty the stack S

X.CLEAR();
ClusterTable Ti(List(trees(i)));
Expand Down Expand Up @@ -77,18 +78,24 @@ int COMCLUST(List trees) {
double consensus_info(const List trees, const LogicalVector phylo,
const NumericVector p) {

int16 v = 0, w = 0,
L, R, N, W,
L_j, R_j, N_j, W_j
;
const int16 n_trees = trees.length();
int32 v = 0;
int32 w = 0;
int32 L;
int32 R;
int32 N;
int32 W;
int32 L_j;
int32 R_j;
int32 N_j;
int32 W_j;
const int32 n_trees = trees.length();

std::vector<ClusterTable> tables;
if (std::size_t(n_trees) > tables.max_size()) {
Rcpp::stop("Not enough memory available to compute consensus of so many trees"); // LCOV_EXCL_LINE
}
tables.reserve(n_trees);
for (int16 i = n_trees; i--; ) {
for (int32 i = n_trees; i--; ) {
tables.emplace_back(ClusterTable(List(trees(i))));
}

Expand All @@ -97,39 +104,38 @@ double consensus_info(const List trees, const LogicalVector phylo,
} else if (p[0] < 0.5) {
Rcpp::stop("p must be >= 0.5 in consensus_info()");
}
const int16
n_tip = tables[0].N(),
thresh = p[0] <= 0.5 ?
const int32 n_tip = tables[0].N();
const int32 thresh = p[0] <= 0.5 ?
(n_trees / 2) + 1 : // Splits must occur in MORE THAN 0.5 to be in majority.
std::ceil(p[0] * n_trees),
must_occur_before = 1 + n_trees - thresh
;
std::ceil(p[0] * n_trees);
const int32 must_occur_before = 1 + n_trees - thresh;

const bool phylo_info = phylo[0];

std::array<int16, TreeTools::ct_stack_size * TreeTools::ct_max_leaves> S;
std::array<int16, TreeTools::ct_max_leaves> split_count;
std::array<int32, TreeTools::ct_stack_size * TreeTools::ct_max_leaves> S;
std::array<int32, TreeTools::ct_max_leaves> split_count;

double info = 0;

const std::size_t ntip_3 = n_tip - 3;
// All clades in p consensus must occur in first (1-p) of trees.
for (int16 i = 0; i != must_occur_before; i++) {
for (int32 i = 0; i < must_occur_before; ++i) {
if (tables[i].NOSWX(ntip_3)) {
continue;
continue;
}

std::vector<int16> split_size(n_tip);
std::vector<int32> split_size(n_tip);
std::fill(split_count.begin(), split_count.begin() + n_tip, 1);

for (int16 j = i + 1; j != n_trees; j++) {
for (int32 j = i + 1; j < n_trees; ++j) {

tables[i].CLEAR();

tables[j].TRESET();
tables[j].READT(&v, &w);

int16 j_pos = 0, Spos = 0; // Empty the stack S
int32 j_pos = 0;
int32 Spos = 0; // Empty the stack S

do {
if (IS_LEAF(v)) {
Expand All @@ -153,15 +159,15 @@ double consensus_info(const List trees, const LogicalVector phylo,
// Split has already been counted; next!
} else {
if (N == R - L + 1) { // L..R is contiguous, and must be tested
if (tables[i].CLUSTONL(&L, &R)) {
if (tables[i].CLUSTONL(L, R)) {
tables[j].SETSWX(j_pos);
assert(L > 0);
++split_count[L - 1];
if (!split_size[L - 1]) {
split_size[L - 1] = N;
}
assert(split_size[L - 1] > 0);
} else if (tables[i].CLUSTONR(&L, &R)) {
} else if (tables[i].CLUSTONR(L, R)) {
tables[j].SETSWX(j_pos);
assert(R > 0);
++split_count[R - 1];
Expand All @@ -177,8 +183,8 @@ double consensus_info(const List trees, const LogicalVector phylo,
} while (v);
}

int16 splits_found = 0;
for (int16 k = n_tip; k--; ) {
int32 splits_found = 0;
for (int32 k = n_tip; k--; ) {
if (split_count[k] >= thresh) {
++splits_found;
if (phylo_info) {
Expand Down Expand Up @@ -231,8 +237,9 @@ IntegerVector robinson_foulds_all_pairs(List tables) {

for (int j = i + 1; j < n_trees; ++j) {

int16 v, w;
int16 n_shared = 0;
int32 v;
int32 w;
int32 n_shared = 0;

ClusterTable* Tj = tbl[j];

Expand All @@ -250,18 +257,18 @@ IntegerVector robinson_foulds_all_pairs(List tables) {
} else {
ASSERT(S_top > S_entries.data());
const StackEntry& entry = *--S_top;
int16 L = entry.L;
int16 R = entry.R;
int16 N = entry.N;
const int16 W_i = entry.W;
int16 W = 1 + W_i;
int32 L = entry.L;
int32 R = entry.R;
int32 N = entry.N;
const int32 W_i = entry.W;
int32 W = 1 + W_i;

w -= W_i;

if (w) { // Unroll first iteration - common case
ASSERT(S_top > S_entries.data());
const StackEntry& entry = *--S_top;
const int16 W_i = entry.W;
const int32 W_i = entry.W;

L = std::min(L, entry.L); // Faster than ternary operator
R = std::max(R, entry.R);
Expand All @@ -272,7 +279,7 @@ IntegerVector robinson_foulds_all_pairs(List tables) {
while (w) {
ASSERT(S_top > S_entries.data());
const StackEntry& entry = *--S_top;
const int16 W_i = entry.W;
const int32 W_i = entry.W;

L = std::min(L, entry.L);
R = std::max(R, entry.R);
Expand Down
39 changes: 22 additions & 17 deletions src/information.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
#define _TREEDIST_INFO_H

#include <cmath> /* for log2() */
#include <TreeTools/ClusterTable.h> /* for CT_MAX_LEAVES */

#include "ints.h" /* for int16 */

constexpr int_fast32_t LOG_MAX = 2048;
Expand All @@ -15,7 +13,10 @@ void compute_log2_table() {
}
}

constexpr int_fast32_t FACT_MAX = CT_MAX_LEAVES + CT_MAX_LEAVES + 5 + 1;
// 16383 is a legacy value from TreeTools v2.0.0
// TODO consider increasing; or support more by calculation.
constexpr int_fast32_t CT_MAX_LEAVES = 16383;
constexpr int_fast32_t FACT_MAX = (CT_MAX_LEAVES * 2) + 5 + 1;
constexpr double log_2 = 0.6931471805599452862268;

double ldfact[FACT_MAX];
Expand Down Expand Up @@ -43,34 +44,38 @@ __attribute__((constructor))
}
}

inline double split_phylo_info (const int16 n_in, const int16 *n_tip,
const double p) {
const int16 n_out = *n_tip - n_in;
inline double split_phylo_info(const int32 n_in, const int32 *n_tip,
const double p) {
if (*n_tip > CT_MAX_LEAVES) {
Rcpp::stop("This many leaves are not yet supported.");
}
const int32 n_out = *n_tip - n_in;
assert(p > 0);
assert(p <= 1);
assert(n_in > 1);
assert(n_out > 1);
if (p == 1) {
return (l2unrooted[*n_tip] - l2rooted[n_in] - l2rooted[n_out]);
} else {
const double
q = 1 - p,
l2n = l2unrooted[*n_tip],
l2n_consistent = l2rooted[n_in] + l2rooted[n_out],
l2p_consistent = l2n_consistent - l2n,
l2p_inconsistent = log2(-expm1(l2p_consistent * log_2)),
l2n_inconsistent = l2p_inconsistent + l2n
;
const double q = 1 - p;
const double l2n = l2unrooted[*n_tip];
const double l2n_consistent = l2rooted[n_in] + l2rooted[n_out];
const double l2p_consistent = l2n_consistent - l2n;
const double l2p_inconsistent = log2(-expm1(l2p_consistent * log_2));
const double l2n_inconsistent = l2p_inconsistent + l2n;

return(l2n +
p * (log2(p) - l2n_consistent) +
q * (log2(q) - l2n_inconsistent));
}
}

inline double split_clust_info (const int16 n_in, const int16 *n_tip,
const double p) {
const int16 n_out = *n_tip - n_in;
inline double split_clust_info(const int32 n_in, const int32 *n_tip,
const double p) {
if (*n_tip > CT_MAX_LEAVES) {
Rcpp::stop("This many leaves are not yet supported.");
}
const int32 n_out = *n_tip - n_in;
assert(p > 0);
assert(p <= 1);
assert(n_in > 1);
Expand Down
Loading
Loading