Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ build
.cache
.ccls-cache
__pycache__
/.vscode
59 changes: 59 additions & 0 deletions R/collapse_short_branches.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#! /usr/bin/env Rscript
main = function() {
message("COLLAPSE SHORT BRANCH LENGTHS")

.libPaths(
c(
"/home/samt123/R/x86_64-pc-linux-gnu-library/4.2",
.libPaths()
)
)

args <- commandArgs(trailingOnly = TRUE)

inphy.txt = args[[1]]
tol = as.numeric(args[[2]])

# file names -------------------------------------------------------------
inphy = readLines(inphy.txt, n = 1)

outphy = paste0(
paste0(
fs::path_ext_remove(inphy),
"-col."
),
fs::path_ext(inphy)
)

outphy.txt = paste0(
paste0(
fs::path_ext_remove(inphy.txt),
"-col."
),
fs::path_ext(inphy.txt)
)

if (fs::file_exists(outphy) & fs::file_exists(outphy.txt)) {
message(
outphy,
" and ",
outphy.txt,
" already exist! Continuing without re-collapsing branches"
)
return(0)
}

message("Reading tree")
phy = ape::read.tree(file = inphy)

message("Collapsing branches shorter than ", tol)
phy_col = ape::di2multi(phy, tol = tol)
message(ape::Nnode(phy), " -> ", ape::Nnode(phy_col))

message("Writing tree")
ape::write.tree(phy_col, file = outphy)
message("Writing ", outphy.txt)
writeLines(outphy, outphy.txt)
}

main()
71 changes: 71 additions & 0 deletions R/rotate.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#! /usr/bin/env Rscript
main = function() {
message("ROTATE TREE")

.libPaths(
c(
"/home/samt123/R/x86_64-pc-linux-gnu-library/4.2",
.libPaths()
)
)

args <- commandArgs(trailingOnly = TRUE)

inphy.txt = args[[1]]
infasta = args[[2]]

# file names
inphy = readLines(inphy.txt, n = 1)

outphy = paste0(
paste0(
fs::path_ext_remove(inphy),
"-rot."
),
fs::path_ext(inphy)
)

outphy.txt = paste0(
paste0(
fs::path_ext_remove(inphy.txt),
"-rot."
),
fs::path_ext(inphy.txt)
)

if (fs::file_exists(outphy) & fs::file_exists(outphy.txt)) {
message(
outphy,
" and ",
outphy.txt,
" already exist! Continuing without re-rotating tree."
)
return(0)
}

message("Reading fasta")
fa = ape::read.dna(
file = infasta,
format = "fasta",
as.character = T,
as.matrix = F
)

outgroup = names(fa)[[1]]

message("Reading ", inphy)
t = ape::read.tree(file = inphy)

message("Rooting tree at ", outgroup)
t = ape::root(t, outgroup = outgroup)
message("Ladderizing tree")
t = ape::ladderize(t, right = F)

message("Writing tree")
ape::write.tree(t, file = outphy)
message("Writing ", outphy.txt)
writeLines(outphy, outphy.txt)
return(0)
}

main()
33 changes: 33 additions & 0 deletions R/set_min_branch_lengths.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#! /usr/bin/env Rscript

main = function() {
message("SET MINIMUM BRANCH LENGTHS")

.libPaths(
c(
"/home/samt123/R/x86_64-pc-linux-gnu-library/4.2",
.libPaths()
)
)

args <- commandArgs(trailingOnly = TRUE)

infile = args[[1]]
outfile = args[[2]]
min_bl = as.numeric(args[[3]])

if (fs::file_exists(outfile)) {
message(outfile, " already exists. Not remaking.")
}

message("Reading tree")
t = ape::read.tree(file = infile)

message("Setting minimum branch lengths to ", min_bl)
t$edge.length[t$edge.length < min_bl] = min_bl

message("Writing tree")
ape::write.tree(phy = t, file = outfile)
}

main()
171 changes: 171 additions & 0 deletions R/tree_trim.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
#! /usr/bin/env Rscript

main = function() {
message("TRIM TREE")

.libPaths(
c(
"/home/samt123/R/x86_64-pc-linux-gnu-library/4.2",
.libPaths()
)
)

library(seqUtils, quietly = T, warn.conflicts = F, verbose = F)
library(convergence, quietly = T, warn.conflicts = F, verbose = F)

library(treeio, quietly = T, warn.conflicts = F, verbose = F, )

library(purrr, quietly = T, warn.conflicts = F, verbose = F)
library(dplyr, quietly = T, warn.conflicts = F, verbose = F)
library(stringr, quietly = T, warn.conflicts = F, verbose = F)

convergence:::add_to_PATH("/home/sat65/miniforge3/envs/treebuild/bin") # usher

# args ------------------------------------------------------------
args <- commandArgs(trailingOnly = TRUE)

subtype = args[[1]]
inphy.txt = args[[2]]
infasta = args[[3]]

inphy = readLines(inphy.txt, n = 1)

outphy = paste0(
paste0(
fs::path_ext_remove(inphy),
"-trim."
),
fs::path_ext(inphy)
)

outphy.txt = paste0(
paste0(
fs::path_ext_remove(inphy.txt),
"-trim."
),
fs::path_ext(inphy.txt)
)

outfasta = paste0(
paste0(
fs::path_ext_remove(infasta),
"-trim."
),
fs::path_ext(infasta)
)

if (all(fs::file_exists(c(outphy, outphy.txt, outfasta)))) {
message("All output files already exist. Not remaking.")
}

# specify trim ----------------------------------------------------
if (subtype == "h3") {
trim = T
mutation = "223V"
required_mutations = list() # list(list(at = 100, to = "T"), list(at = ...))
forbidden_mutations = list()
min_tips = 15000
} else if (subtype == "h1") {
trim = F
mutation = NA
required_mutations = list()
forbidden_mutations = list()
min_tips = 0
} else if (subtype == "bvic") {
trim = T
mutation = "127T"
required_mutations = list()
forbidden_mutations = list()
min_tips = 15000
} else if (subtype == "byam") {
trim = F
mutation = NA
required_mutations = list()
forbidden_mutations = list()
min_tips = 0
} else {
stop("Invalid subtype ", subtype)
}

# read input files ------------------------------------------------------------

phy = ape::read.tree(file = inphy)
fa = seqUtils::fast_fasta(infasta)

# trim ------------------------------------------------------------
if (!trim) {
trimmed_phy = phy
trimmed_seqs = fa
} else {
tree_and_sequences = list(
tree = phy,
sequences = tibble::tibble(
Isolate_unique_identifier = names(fa),
dna_sequence = seqUtils::clean_sequences(unname(fa), type = "nt")
)
)

usher_tree_and_sequences = convergence::addASRusher(
tree_and_sequences,
nuc_ref = fa[1],
aa_ref = as.character(Biostrings::translate(Biostrings::DNAString(fa[1])))
)

usher_tree_and_sequences$tree_tibble$nd = c(
rep(1, ape::Ntip(usher_tree_and_sequences$tree)),
castor::count_tips_per_node(usher_tree_and_sequences$tree)
)

tree_tibble_mutation_occs = usher_tree_and_sequences$tree_tibble %>%
filter(
map_lgl(aa_mutations_nonsyn, ~mutation %in% str_sub(.x, 2)),
nd >= min_tips
) %>%
arrange(-nd)

for (r_m in required_mutations){
at = as.integer(r_m[["at"]])
to = r_m[["to"]]
tree_tibble_mutation_occs = tree_tibble_mutation_occs %>%
filter(substr(reconstructed_aa_sequence, at, at) == to)
}

for (f_m in required_mutations){
at = as.integer(f_m[["at"]])
to = f_m[["to"]]
tree_tibble_mutation_occs = tree_tibble_mutation_occs %>%
filter(substr(reconstructed_aa_sequence, at, at) != to)
}

if (nrow(tree_tibble_mutation_occs) != 1){
stop("No tree branches fulfilling specification found ")
}

ancestor = tree_tibble_mutation_occs$node[[1]]

trimmed_phy = castor::get_subtree_at_node(
usher_tree_and_sequences$tree,
ancestor - ape::Ntip(usher_tree_and_sequences$tree)
)$subtree

trimmed_phy = ape::ladderize(trimmed_phy, right = F)
trimmed_phy$edge.length = trimmed_phy$edge.length / mean(nchar(fa)) # per nt
trimmed_seqs = fa[names(fa) %in% trimmed_phy$tip.label]
}

message("Writing tree")
ape::write.tree(trimmed_phy, file = outphy)

message("Writing ", outphy.txt)
writeLines(outphy, outphy.txt)

message("Writing ", outfasta)
seqUtils::write_fast_fasta(
seqs = unname(trimmed_seqs),
names = names(trimmed_seqs),
path = outfasta
)
return(0)
}

main()
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

- install homebrew https://brew.sh
- brew install llvm ninja meson libomp cmake brotli zlib xz gnu-time catch2
- pip3 install mypy

## Installing dependencies on Ubuntu

Expand Down
2 changes: 0 additions & 2 deletions bin/seqdb-update
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ def main(args: argparse.Namespace):
# only aligned are stored
ae_backend.raw_sequence.calculate_hash(sequence)
sequences_by_subtype.setdefault(sequence.type_subtype, []).append(sequence)
else:
print(f"{sequence.name:40s} {sequence.aa}")
except Exception as err:
print(f"> {err}: {sequence.name}\n {sequence.aa}")
reader.context.messages_from_backend(sequence_messages)
Expand Down
2 changes: 1 addition & 1 deletion cc/tree/newick.cc
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ namespace ae::tree::newick
{
static constexpr auto whitespace = dsl::ascii::space / dsl::ascii::newline;
static constexpr auto rule = dsl::p<internal_node> + dsl::semicolon + dsl::eof;
static constexpr auto max_recursion_depth = 1000;
static constexpr auto max_recursion_depth = 10000;

static constexpr auto value = lexy::forward<result_t>;
};
Expand Down
7 changes: 2 additions & 5 deletions cc/whocc/xlsx/sheet-extractor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ static const std::regex re_VIDRL_serum_name{"^(?:[AB]/)?([A-Z][A-Z ]+)/?([0-9]+)
static const std::regex re_VIDRL_serum_id{"^(" pattern_VIDRL_serum_id "|" pattern_CRICK_serum_id ")$", regex_icase};
static const std::regex re_VIDRL_serum_id_with_days{"^[AF][0-9][0-9][0-9][0-9]-[0-9]+D$", regex_icase};

static const std::regex re_human_who_serum{R"(^\s*(.*(HUMAN|WHO|NORMAL)|GOAT|POST? VAX|VAX POOL|SERA POOL)\b)", regex_icase}; // "POST VAX": VIDRL H3 HI 2021, "HUMAN VAX": VIDRL H3 HI 2022
static const std::regex re_human_who_serum{R"(^\s*(.*(HUMAN|WHO|NORMAL)|GOAT|POST? VAX|VAX POOL)\b)", regex_icase}; // "POST VAX": VIDRL H3 HI 2021, "HUMAN VAX": VIDRL H3 HI 2022

#pragma GCC diagnostic pop

Expand Down Expand Up @@ -339,16 +339,13 @@ std::string ae::xlsx::v1::Extractor::titer(size_t ag_no, size_t sr_no) const
void ae::xlsx::v1::Extractor::find_titers(warn_if_not_found winf)
{
std::vector<std::pair<nrow_t, range<ncol_t>>> rows;
AD_DEBUG("Sheet {} rows:{}", sheet().name(), sheet().number_of_rows());
// AD_DEBUG("Sheet {}", sheet().name());
for (nrow_t row{0}; row < sheet().number_of_rows(); ++row) {
auto titers = sheet().titer_range(row);
AD_DEBUG("Row:{} titers: ({} valid:{}) {}:{}", row, titers.length(), titers.valid(), titers.first, titers.second);
adjust_titer_range(row, titers);
AD_DEBUG("Row:{} titers: ({} valid:{}) {}:{}", row, titers.length(), titers.valid(), titers.first, titers.second);
if (titers.valid() && titers.length() > 2 && titers.first > ncol_t{0} && valid_titer_row(row, titers))
rows.emplace_back(row, std::move(titers));
}
AD_DEBUG("Rows with titers: {}", rows.size());

if (!ranges::all_of(rows, [&rows](const auto& en) { return en.second == rows[0].second; })) {
fmt::memory_buffer report; // fmt::format(rows, "{}", "\n "));
Expand Down
Loading