diff --git a/.gitignore b/.gitignore
index efcd6d3..12bb8e5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@ build
 .cache
 .ccls-cache
 __pycache__
+/.vscode
diff --git a/R/collapse_short_branches.R b/R/collapse_short_branches.R
new file mode 100644
index 0000000..e1f868e
--- /dev/null
+++ b/R/collapse_short_branches.R
@@ -0,0 +1,59 @@
+#! /usr/bin/env Rscript
+main = function() {
+    message("COLLAPSE SHORT BRANCH LENGTHS")
+
+    .libPaths(
+        c(
+            "/home/samt123/R/x86_64-pc-linux-gnu-library/4.2",
+            .libPaths()
+        )
+    )
+
+    args <- commandArgs(trailingOnly = TRUE)
+
+    inphy.txt = args[[1]]
+    tol = as.numeric(args[[2]])
+
+    # file names -------------------------------------------------------------
+    inphy = readLines(inphy.txt, n = 1)
+
+    outphy = paste0(
+        paste0(
+            fs::path_ext_remove(inphy),
+            "-col."
+        ),
+        fs::path_ext(inphy)
+    )
+
+    outphy.txt = paste0(
+        paste0(
+            fs::path_ext_remove(inphy.txt),
+            "-col."
+        ),
+        fs::path_ext(inphy.txt)
+    )
+
+    if (fs::file_exists(outphy) & fs::file_exists(outphy.txt)) {
+        message(
+            outphy,
+            " and ",
+            outphy.txt,
+            " already exist! Continuing without re-collapsing branches"
+        )
+        return(0)
+    }
+
+    message("Reading tree")
+    phy = ape::read.tree(file = inphy)
+
+    message("Collapsing branches shorter than ", tol)
+    phy_col = ape::di2multi(phy, tol = tol)
+    message(ape::Nnode(phy), " -> ", ape::Nnode(phy_col))
+
+    message("Writing tree")
+    ape::write.tree(phy_col, file = outphy)
+    message("Writing ", outphy.txt)
+    writeLines(outphy, outphy.txt)
+}
+
+main()
diff --git a/R/rotate.R b/R/rotate.R
new file mode 100644
index 0000000..e2d7d86
--- /dev/null
+++ b/R/rotate.R
@@ -0,0 +1,71 @@
+#! /usr/bin/env Rscript
+main = function() {
+    message("ROTATE TREE")
+
+    .libPaths(
+        c(
+            "/home/samt123/R/x86_64-pc-linux-gnu-library/4.2",
+            .libPaths()
+        )
+    )
+
+    args <- commandArgs(trailingOnly = TRUE)
+
+    inphy.txt = args[[1]]
+    infasta = args[[2]]
+
+    # file names
+    inphy = readLines(inphy.txt, n = 1)
+
+    outphy = paste0(
+        paste0(
+            fs::path_ext_remove(inphy),
+            "-rot."
+        ),
+        fs::path_ext(inphy)
+    )
+
+    outphy.txt = paste0(
+        paste0(
+            fs::path_ext_remove(inphy.txt),
+            "-rot."
+        ),
+        fs::path_ext(inphy.txt)
+    )
+
+    if (fs::file_exists(outphy) & fs::file_exists(outphy.txt)) {
+        message(
+            outphy,
+            " and ",
+            outphy.txt,
+            " already exist! Continuing without re-rotating tree."
+        )
+        return(0)
+    }
+
+    message("Reading fasta")
+    fa = ape::read.dna(
+        file = infasta,
+        format = "fasta",
+        as.character = T,
+        as.matrix = F
+    )
+
+    outgroup = names(fa)[[1]]
+
+    message("Reading ", inphy)
+    t = ape::read.tree(file = inphy)
+
+    message("Rooting tree at ", outgroup)
+    t = ape::root(t, outgroup = outgroup)
+    message("Ladderizing tree")
+    t = ape::ladderize(t, right = F)
+
+    message("Writing tree")
+    ape::write.tree(t, file = outphy)
+    message("Writing ", outphy.txt)
+    writeLines(outphy, outphy.txt)
+    return(0)
+}
+
+main()
diff --git a/R/set_min_branch_lengths.R b/R/set_min_branch_lengths.R
new file mode 100755
index 0000000..764afaa
--- /dev/null
+++ b/R/set_min_branch_lengths.R
@@ -0,0 +1,33 @@
+#! /usr/bin/env Rscript
+
+main = function() {
+  message("SET MINIMUM BRANCH LENGTHS")
+
+  .libPaths(
+    c(
+      "/home/samt123/R/x86_64-pc-linux-gnu-library/4.2",
+      .libPaths()
+    )
+  )
+
+  args <- commandArgs(trailingOnly = TRUE)
+
+  infile = args[[1]]
+  outfile = args[[2]]
+  min_bl = as.numeric(args[[3]])
+
+  if (fs::file_exists(outfile)) {
+    message(outfile, " already exists. Not remaking.")
+  }
+
+  message("Reading tree")
+  t = ape::read.tree(file = infile)
+
+  message("Setting minimum branch lengths to ", min_bl)
+  t$edge.length[t$edge.length < min_bl] = min_bl
+
+  message("Writing tree")
+  ape::write.tree(phy = t, file = outfile)
+}
+
+main()
diff --git a/R/tree_trim.R b/R/tree_trim.R
new file mode 100644
index 0000000..723760f
--- /dev/null
+++ b/R/tree_trim.R
@@ -0,0 +1,171 @@
+#! /usr/bin/env Rscript
+
+main = function() {
+  message("TRIM TREE")
+
+  .libPaths(
+    c(
+      "/home/samt123/R/x86_64-pc-linux-gnu-library/4.2",
+      .libPaths()
+    )
+  )
+
+  library(seqUtils, quietly = T, warn.conflicts = F, verbose = F)
+  library(convergence, quietly = T, warn.conflicts = F, verbose = F)
+
+  library(treeio, quietly = T, warn.conflicts = F, verbose = F, )
+
+  library(purrr, quietly = T, warn.conflicts = F, verbose = F)
+  library(dplyr, quietly = T, warn.conflicts = F, verbose = F)
+  library(stringr, quietly = T, warn.conflicts = F, verbose = F)
+
+  convergence:::add_to_PATH("/home/sat65/miniforge3/envs/treebuild/bin") # usher
+
+  # args ------------------------------------------------------------
+  args <- commandArgs(trailingOnly = TRUE)
+
+  subtype = args[[1]]
+  inphy.txt = args[[2]]
+  infasta = args[[3]]
+
+  inphy = readLines(inphy.txt, n = 1)
+
+  outphy = paste0(
+    paste0(
+      fs::path_ext_remove(inphy),
+      "-trim."
+    ),
+    fs::path_ext(inphy)
+  )
+
+  outphy.txt = paste0(
+    paste0(
+      fs::path_ext_remove(inphy.txt),
+      "-trim."
+    ),
+    fs::path_ext(inphy.txt)
+  )
+
+  outfasta = paste0(
+    paste0(
+      fs::path_ext_remove(infasta),
+      "-trim."
+    ),
+    fs::path_ext(infasta)
+  )
+
+  if (all(fs::file_exists(c(outphy, outphy.txt, outfasta)))) {
+    message("All output files already exist. Not remaking.")
+  }
+
+  # specify trim ----------------------------------------------------
+  if (subtype == "h3") {
+    trim = T
+    mutation = "223V"
+    required_mutations = list() # list(list(at = 100, to = "T"), list(at = ...))
+    forbidden_mutations = list()
+    min_tips = 15000
+  } else if (subtype == "h1") {
+    trim = F
+    mutation = NA
+    required_mutations = list()
+    forbidden_mutations = list()
+    min_tips = 0
+  } else if (subtype == "bvic") {
+    trim = T
+    mutation = "127T"
+    required_mutations = list()
+    forbidden_mutations = list()
+    min_tips = 15000
+  } else if (subtype == "byam") {
+    trim = F
+    mutation = NA
+    required_mutations = list()
+    forbidden_mutations = list()
+    min_tips = 0
+  } else {
+    stop("Invalid subtype ", subtype)
+  }
+
+  # read input files ------------------------------------------------------------
+
+  phy = ape::read.tree(file = inphy)
+  fa = seqUtils::fast_fasta(infasta)
+
+  # trim ------------------------------------------------------------
+  if (!trim) {
+    trimmed_phy = phy
+    trimmed_seqs = fa
+  } else {
+    tree_and_sequences = list(
+      tree = phy,
+      sequences = tibble::tibble(
+        Isolate_unique_identifier = names(fa),
+        dna_sequence = seqUtils::clean_sequences(unname(fa), type = "nt")
+      )
+    )
+
+    usher_tree_and_sequences = convergence::addASRusher(
+      tree_and_sequences,
+      nuc_ref = fa[1],
+      aa_ref = as.character(Biostrings::translate(Biostrings::DNAString(fa[1])))
+    )
+
+    usher_tree_and_sequences$tree_tibble$nd = c(
+      rep(1, ape::Ntip(usher_tree_and_sequences$tree)),
+      castor::count_tips_per_node(usher_tree_and_sequences$tree)
+    )
+
+    tree_tibble_mutation_occs = usher_tree_and_sequences$tree_tibble %>%
+      filter(
+        map_lgl(aa_mutations_nonsyn, ~mutation %in% str_sub(.x, 2)),
+        nd >= min_tips
+      ) %>%
+      arrange(-nd)
+
+    for (r_m in required_mutations){
+      at = as.integer(r_m[["at"]])
+      to = r_m[["to"]]
+      tree_tibble_mutation_occs = tree_tibble_mutation_occs %>%
+        filter(substr(reconstructed_aa_sequence, at, at) == to)
+    }
+
+    for (f_m in required_mutations){
+      at = as.integer(f_m[["at"]])
+      to = f_m[["to"]]
+      tree_tibble_mutation_occs = tree_tibble_mutation_occs %>%
+        filter(substr(reconstructed_aa_sequence, at, at) != to)
+    }
+
+    if (nrow(tree_tibble_mutation_occs) != 1){
+      stop("No tree branches fulfilling specification found ")
+    }
+
+    ancestor = tree_tibble_mutation_occs$node[[1]]
+
+    trimmed_phy = castor::get_subtree_at_node(
+      usher_tree_and_sequences$tree,
+      ancestor - ape::Ntip(usher_tree_and_sequences$tree)
+    )$subtree
+
+    trimmed_phy = ape::ladderize(trimmed_phy, right = F)
+    trimmed_phy$edge.length = trimmed_phy$edge.length / mean(nchar(fa)) # per nt
+    trimmed_seqs = fa[names(fa) %in% trimmed_phy$tip.label]
+  }
+
+  message("Writing tree")
+  ape::write.tree(trimmed_phy, file = outphy)
+
+  message("Writing ", outphy.txt)
+  writeLines(outphy, outphy.txt)
+
+  message("Writing ", outfasta)
+  seqUtils::write_fast_fasta(
+    seqs = unname(trimmed_seqs),
+    names = names(trimmed_seqs),
+    path = outfasta
+  )
+  return(0)
+}
+
+main()
diff --git a/README.md b/README.md
index 5afdaa1..a8f15e6 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,6 @@
 
 - install homebrew https://brew.sh
 - brew install llvm ninja meson libomp cmake brotli zlib xz gnu-time catch2
-- pip3 install mypy
 
 ## Installing dependencies on Ubuntu
 
diff --git a/bin/seqdb-update b/bin/seqdb-update
index db670c9..ab0fdde 100755
--- a/bin/seqdb-update
+++ b/bin/seqdb-update
@@ -39,8 +39,6 @@ def main(args: argparse.Namespace):
                         # only aligned are stored
                         ae_backend.raw_sequence.calculate_hash(sequence)
                         sequences_by_subtype.setdefault(sequence.type_subtype, []).append(sequence)
-                    else:
-                        print(f"{sequence.name:40s} {sequence.aa}")
                 except Exception as err:
                     print(f"> {err}: {sequence.name}\n    {sequence.aa}")
                 reader.context.messages_from_backend(sequence_messages)
diff --git a/cc/tree/newick.cc b/cc/tree/newick.cc
index 85ff59d..dd4f1f3 100644
--- a/cc/tree/newick.cc
+++ b/cc/tree/newick.cc
@@ -174,7 +174,7 @@ namespace ae::tree::newick
         {
             static constexpr auto whitespace = dsl::ascii::space / dsl::ascii::newline;
             static constexpr auto rule = dsl::p<internal_node> + dsl::semicolon + dsl::eof;
-            static constexpr auto max_recursion_depth = 1000;
+            static constexpr auto max_recursion_depth = 10000;
 
             static constexpr auto value = lexy::forward<result_t>;
         };
diff --git a/cc/whocc/xlsx/sheet-extractor.cc b/cc/whocc/xlsx/sheet-extractor.cc
index 836de0f..9b9bfea 100644
--- a/cc/whocc/xlsx/sheet-extractor.cc
+++ b/cc/whocc/xlsx/sheet-extractor.cc
@@ -96,7 +96,7 @@ static const std::regex re_VIDRL_serum_name{"^(?:[AB]/)?([A-Z][A-Z ]+)/?([0-9]+)
 static const std::regex re_VIDRL_serum_id{"^(" pattern_VIDRL_serum_id "|" pattern_CRICK_serum_id ")$", regex_icase};
 static const std::regex re_VIDRL_serum_id_with_days{"^[AF][0-9][0-9][0-9][0-9]-[0-9]+D$", regex_icase};
 
-static const std::regex re_human_who_serum{R"(^\s*(.*(HUMAN|WHO|NORMAL)|GOAT|POST? VAX|VAX POOL|SERA POOL)\b)", regex_icase}; // "POST VAX": VIDRL H3 HI 2021, "HUMAN VAX": VIDRL H3 HI 2022
+static const std::regex re_human_who_serum{R"(^\s*(.*(HUMAN|WHO|NORMAL)|GOAT|POST? VAX|VAX POOL)\b)", regex_icase}; // "POST VAX": VIDRL H3 HI 2021, "HUMAN VAX": VIDRL H3 HI 2022
 
 #pragma GCC diagnostic pop
 
@@ -339,16 +339,13 @@ std::string ae::xlsx::v1::Extractor::titer(size_t ag_no, size_t sr_no) const
 void ae::xlsx::v1::Extractor::find_titers(warn_if_not_found winf)
 {
     std::vector<std::pair<nrow_t, range<ncol_t>>> rows;
-    AD_DEBUG("Sheet {} rows:{}", sheet().name(), sheet().number_of_rows());
+    // AD_DEBUG("Sheet {}", sheet().name());
     for (nrow_t row{0}; row < sheet().number_of_rows(); ++row) {
         auto titers = sheet().titer_range(row);
-        AD_DEBUG("Row:{} titers: ({} valid:{}) {}:{}", row, titers.length(), titers.valid(), titers.first, titers.second);
         adjust_titer_range(row, titers);
-        AD_DEBUG("Row:{} titers: ({} valid:{}) {}:{}", row, titers.length(), titers.valid(), titers.first, titers.second);
         if (titers.valid() && titers.length() > 2 && titers.first > ncol_t{0} && valid_titer_row(row, titers))
             rows.emplace_back(row, std::move(titers));
     }
-    AD_DEBUG("Rows with titers: {}", rows.size());
 
     if (!ranges::all_of(rows, [&rows](const auto& en) { return en.second == rows[0].second; })) {
         fmt::memory_buffer report; // fmt::format(rows, "{}", "\n  "));
diff --git a/proj/weekly-tree/export b/proj/weekly-tree/export
index 580e904..bc61c17 100755
--- a/proj/weekly-tree/export
+++ b/proj/weekly-tree/export
@@ -2,23 +2,50 @@
 
 import sys, os, argparse, traceback
 from pathlib import Path
+
 sys.path[:0] = [str(Path(os.environ["AE_ROOT"], dir)) for dir in ["build", "py"]]
 import ae_backend
 from ae.sequences.source import fasta
 
 # ======================================================================
 
-sSubtypeForSeqdb = {"B": "B", "BVIC": "B", "BYAM": "B", "A(H1N1)": "A(H1N1)", "H1": "A(H1N1)", "1": "A(H1N1)", "A(H3N2)": "A(H3N2)", "H3": "A(H3N2)", "3": "A(H3N2)"}
-sSubtypeNorm = {"BV": "BVIC", "BVIC": "BVIC", "BY": "BYAM", "BYAM": "BYAM", "A(H1N1)": "A(H1N1)", "H1": "A(H1N1)", "1": "A(H1N1)", "A(H3N2)": "A(H3N2)", "H3": "A(H3N2)", "3": "A(H3N2)"}
+sSubtypeForSeqdb = {
+    "B": "B",
+    "BVIC": "B",
+    "BYAM": "B",
+    "A(H1N1)": "A(H1N1)",
+    "H1": "A(H1N1)",
+    "1": "A(H1N1)",
+    "A(H3N2)": "A(H3N2)",
+    "H3": "A(H3N2)",
+    "3": "A(H3N2)",
+}
+sSubtypeNorm = {
+    "BV": "BVIC",
+    "BVIC": "BVIC",
+    "BY": "BYAM",
+    "BYAM": "BYAM",
+    "A(H1N1)": "A(H1N1)",
+    "H1": "A(H1N1)",
+    "1": "A(H1N1)",
+    "A(H3N2)": "A(H3N2)",
+    "H3": "A(H3N2)",
+    "3": "A(H3N2)",
+}
 sSubdirforSubtype = {"BVIC": "bvic", "BYAM": "byam", "A(H1N1)": "h1", "A(H3N2)": "h3"}
 
+
 def main(args: argparse.Namespace):
-    subtypes = [sSubtypeNorm[st] for st in (args.subtype or ["BVIC", "BYAM", "H1", "H3"])]
+    subtypes = [
+        sSubtypeNorm[st] for st in (args.subtype or ["BVIC", "BYAM", "H1", "H3"])
+    ]
     for subtype in subtypes:
         sSubtypeToFunc[subtype](subtype=subtype, args=args)
 
+
 # ----------------------------------------------------------------------
 
+
 def bvic(subtype: str, args: argparse.Namespace):
     outgroup = "VICTORIA/830/2013"
     selected = (
@@ -28,16 +55,116 @@ def bvic(subtype: str, args: argparse.Namespace):
         .lineage("VICTORIA")
         .exclude_with_issue()
         .filter_dates(first="2016-06")
-        .find_masters()         # to be able to use .filter below
-        .exclude(lambda ref: ref.aa["165-"]) # exclude 6-del and MIE/1/2019 with garbage in the middle
-        .exclude_name([
-            "ANHUI_JINGHU/63/2019", # garbage in 406-414 leading to a long branch (2021-0722)
-            "LISBOA/NISU182_17-18/2018", # yamagata, long branch 2021-1117
-            # "ZAMBIA/503/2021", # potential long branch 2022-0401
-            # 2023-01-28 potential long branch
-            "KRABI/FS002/2022",
-            ])
-        .include_name(outgroup)    # outgroup BRISBANE/60/2008, B/CHONGQING BANAN/1840/2017 (V1A)
+        .find_masters()  # to be able to use .filter below
+        .exclude(
+            lambda ref: ref.aa["165-"]
+        )  # exclude 6-del and MIE/1/2019 with garbage in the middle
+        .exclude_name(
+            [
+                "ANHUI_JINGHU/63/2019",  # garbage in 406-414 leading to a long branch (2021-0722)
+                "LISBOA/NISU182_17-18/2018",  # yamagata, long branch 2021-1117
+                # "ZAMBIA/503/2021", # potential long branch 2022-0401
+                # 2023-01-28 potential long branch
+                "KRABI/FS002/2022",
+                "CATALONIA/NSVH151199301/2023_OR_0ACCE47F",  # XXXX 2023-0421
+                "SOUTH_AFRICA/R10785/2022_OR_0EB8BBEA",  # XXX 2023-0515
+                "SOUTH_AFRICA/R10907/2022_OR_BA5B358C",
+                "SOUTH_AFRICA/R09298/2022_OR_1A5C630A",
+                "SOUTH_AFRICA/R10914/2022_OR_ADA6D682",
+                "SOUTH_AFRICA/R10638/2022_OR_5EAB4CBB",
+                "SOUTH_AFRICA/R11310/2022_OR_12125C84",
+                "SOUTH_AFRICA/R11575/2022_OR_FED84F4F",
+                "SOUTH_AFRICA/R08092/2022_OR_93DDF2B2",
+                # XXX 2023-0602
+                "LA_REUNION/HCL023041635201/2022_103E7754",
+                "ROMANIA/550950/2023_MDCK-SIAT_E0DECCD2",
+                "SAINT_PETERSBURG/RII-10866S/2023_OR_477275F7",
+                # XXX2023-0629
+                "ABU_DHABI/4030/2022_AEC2DE48",
+                "RIO_GRANDE_DO_SUL/28773/2022_455F4FFF",
+                # 2023-0731
+                "MALAYSIA/IMR-SARI0563/2023_25FE3E0E",
+                # 2023-0811 Yam
+                "KENYA/KCH157321/2019_OR_308AC34A",
+                # 2023-0822 XXX
+                "ALAGOAS/5742/2023_OR_441065AD",
+                "ALAGOAS/4659/2023_OR_558AC3D6",
+                "PARANA/5012/2023_OR_D54659DC",
+                # 2023-1005 XXXX
+                "UNITED_KINGDOM/UO-476/2023",
+                # 2024-0130
+                "SANTA_CATARINA/10748/2023_OR_7B192215",
+                # 2024-0703
+                "MADRID/HULP-200063972/2024_C98ADFD1",  # unknown at deletion positions 162-164
+                "MADRID/HULP-200041842/2024_5D4B7141",  # unknown at deletion positions 162-164
+                # 2024-0718
+                "AUSTRIA/MUW1681066/2024_OR_3D9C21FB",  # XXXX
+                "ATHENS/ILI411/2024_OR_556F1400",  # XXXX
+                # 2024-0411
+                "PAIS_VASCO/1919/2024_OR_1D682639",
+                "GALICIA/52314290/2024_AF2005E4",
+                # 2025-2411
+                "MADAGASCAR/3422/2024_OR_DF3D589F",
+                "MEXICO/2429277701/2024_OR_3BF2F243",
+                "GALICIA/GA-CHUAC-250/2025_OR_AC92C5F3",
+                # 2025-1202
+                "PAKISTAN/NIC-JHL/2010_OR_EC56A7FD",
+                "PAKISTAN/NIC-ICT-284/2025_OR_8DAE4B6A",
+                "PAKISTAN/NIC-ICT-218/2025_OR_362FAB6C",
+                "PAKISTAN/NMC-12/2025_OR_7B217EEC",
+                "PAKISTAN/NIC-ICT-190/2025_OR_D4D6D601",
+                "ANDALUCIA/PMC-00528/2025_OR_549D6405",
+                # 2025-0313
+                "GALICIA/52463655/2025_0971E6C6",
+                # 2025-0507
+                "THUERINGEN/31/2025_OR_758A40BD",
+                "FRANCE/ARA-HCL025053223301/2025_OR_1C544F25",
+                "ABAY/NRL-2330/2025_C2E364F7",
+                "RIO_DE_JANEIRO/533/2025_OR_C04A6642",
+                "LA_REUNION/HCL023041635201/2022_OR_103E7754",
+                "ASTURIAS/232465739/2024_EA8BEFAB",
+                "UKRAINE/3471/2025_6E494813",
+                # 2025-0521
+                "GALICIA/52475765/2025_F76C5888",
+                # 2025-0611  no exclusions
+                # 2025-0618  no exclusions
+                # 2025-0625  no exclusions
+                # 2025-0702
+                "PHUKET/P1187/2025_5A09E7D5",
+                # 2025-0716 (omitted exclusions in error,later added in 0723)
+                # 2025-0723
+                "ODISHA/1421/2023_6422458A",
+                # 2025-0729 no exclusions
+                # 2025-0818
+                "INDIA/PUD-NIV25-1188/2025_MDCK3_935FB615",
+                # 2025-0827 no exclusions
+                # 2025-0910 no exclusions
+                # 2025-0917
+                "INDIA/PUD-NIV25-1188/2025_MDCK3_935FB615",
+                "AVILA/227/2024_FLU0789_03C986A9",
+                "AVILA/43/2025_FLU0874_624F5516",
+                "SALAMANCA/214/2024_FLU0771_DF1DA731",
+                "LA_REUNION/CHU725061509401/2025_7A8734BB",
+                # 2025-1006
+                "MEXICO/240450000409/2024_OR_1174529C",
+                # 2025-1028 no exclusions
+                # 2025-1112
+                "SANTA_CATARINA/4844/2025_OR_8B4C49A1",
+                "TOULON/INF_20/2023_OR_06B04496",
+                "TOULON/INF_21/2023_OR_C17020D2",
+                "SANTA_CATARINA/4290/2025_OR_27BF4310",
+                "INDIA/RES-23-4646/2023_AB3871EB",
+                "INDIA/RES-23-1799/2023_840D4617",
+                "INDIA/RES-23-2404/2023_159F0FCE",
+                "INDIA/RES-23-2414/2023_A4EA11A3",
+                # 2025-1126
+                "ODISHA/956/2023_9DD47677", 
+                "B/LEE/40_6BCD57B7",
+            ]
+        )
+        .include_name(
+            outgroup
+        )  # outgroup BRISBANE/60/2008, B/CHONGQING BANAN/1840/2017 (V1A)
         .remove_hash_duplicates()
         .replace_with_master()
         .check()
@@ -47,8 +174,10 @@ def bvic(subtype: str, args: argparse.Namespace):
     report(subtype, selected, expected_nuc_size=1710, limit=0)
     export(subtype, selected)
 
+
 # ----------------------------------------------------------------------
 
+
 def byam(subtype: str, args: argparse.Namespace):
     outgroup = "MINNESOTA/2/2014"
     selected = (
@@ -58,9 +187,15 @@ def byam(subtype: str, args: argparse.Namespace):
         .lineage("YAMAGATA")
         .exclude_with_issue()
         .filter_dates(first="2015")
-        .find_masters()         # to be able to use .filter below
-        .include_name([outgroup]) # , "IDAHO/1/2014", "MASSACHUSETTS/7/2014"])
-        .exclude_name([lin for lin in (line.strip() for line in Path("byam-exclude.txt").open()) if lin and lin[0] != "#"])
+        .find_masters()  # to be able to use .filter below
+        .include_name([outgroup])  # , "IDAHO/1/2014", "MASSACHUSETTS/7/2014"])
+        .exclude_name(
+            [
+                lin
+                for lin in (line.strip() for line in Path("byam-exclude.txt").open())
+                if lin and lin[0] != "#"
+            ]
+        )
         .remove_hash_duplicates()
         .replace_with_master()
         .sort("+date")
@@ -69,15 +204,21 @@ def byam(subtype: str, args: argparse.Namespace):
     report(subtype, selected, expected_nuc_size=1710, limit=0)
     export(subtype, selected)
 
+
 # ----------------------------------------------------------------------
 
+
 def h1(subtype: str, args: argparse.Namespace):
     h1_include_txt = Path("h1-include.txt")
     if not h1_include_txt.exists():
         h1_include_txt = Path(sys.argv[0]).parent.joinpath(h1_include_txt.name)
         if not h1_include_txt.exists():
             raise RuntimeError("> cannot find {h1_include_txt}")
-    to_include = [lin for lin in (line.strip() for line in h1_include_txt.open()) if lin and lin[0] != "#"]
+    to_include = [
+        lin
+        for lin in (line.strip() for line in h1_include_txt.open())
+        if lin and lin[0] != "#"
+    ]
     # to_include = to_include[:-500]
     outgroup = "SOUTH_AUSTRALIA/30/2013"
     selected = (
@@ -86,86 +227,294 @@ def h1(subtype: str, args: argparse.Namespace):
         .human()
         .exclude_with_issue()
         .filter_dates(first="2019")
-        .find_masters()         # to be able to use .filter below
-        .include_name([
-            outgroup,
-            "MICHIGAN/45/2015",
-            "BRAZIL/5026/2017", # # 183S, trying to show "the different 183P clades has S183P independently", inferred from 2018-0924-ssm tree
-        ] + to_include) # ~/AD/share/conf/h1.183S.2017-2018.seqids
-        .exclude_name([
-            "SWINE/",
-            # 2020-08-10 Sarah: They're not sequenced by a CC or NIC, but by a company.  The sequences don't have obvious errors, but are very different (obviously) from the rest. My feeling is they're either variant viruses (from swine) or a sequencing error.
-            "ANKARA/14015-724/2019",
-            "ANKARA/14017-004/2019",
-            "ANKARA/14015-736/2019",
-            # 2021-06-22 deletion at the end
-            # "BADEN-WURTTEMBERG/1/2020",
-            # long branch 2021-0622
-            "DENMARK/1/2021",
-            "MANITOBA/2/2021",
-            # 2021-1117 long branches
-            "MECKLENBURG-VORPOMMERN/1/2021",
-            "MECKLENBURG-VORPOMMERN/1-A/2021", # refers to MECKLENBURG-VORPOMMERN/1/2021
-            "NETHERLANDS/10370-1/2020",
-            "NETHERLANDS/10370-1A/2020",
-            "NETHERLANDS/10370-2/2020", # refers to NETHERLANDS/10370-1A/2020
-            "NETHERLANDS/GENT-193/2019",
-            "HESSEN/47/2020",
-            "HUBEI WUJIAGANG/1324/2020",
-            "HEBEI HAIGANG/SWL1572/2019",
-            "SHANDONG/204/2021",
-            "YUNNAN MENGZI/1462/2020",
-            "NORTH CAROLINA/15/2020",
-            "WISCONSIN/3/2021",
-            "PARANA/10835/2021",
-            # 2021-1125 long branches
-            "GANSU_XIFENG/1194/2021",
-            "GANSU_XIFENG/1143/2021",
-            "SICHUAN/1208/2021",
-            "TIANJIN/30/2020",
-            "WISCONSIN/5/2021",
-            "WISCONSIN/4/2021",
-            "NORTH_DAKOTA/12226/2021",
-            # 2021-1217 potential long branch
-            "IOWA/1/2021", # --> SWINE/MISSOURI/A01104146/2020 (base), SWINE/MISSOURI/A02525160/2021
-            "IOWA/6/2021", # --> "SWINE/IOWA/A02636087/2021", "SWINE/IOWA/A02636130/2021"
-            "NORTH DAKOTA/12226/2021", # --> "SWINE/IOWA/A02636087/2021", "SWINE/IOWA/A02636130/2021"
-            # 2021-1217 long branches
-            "NETHERLANDS/10370-1A/2020",
-            # 2022-0131 potential long branch
-            "DENMARK/36/2021",
-            # 2022-04-26 mess in 156-179
-            "INDIA/NIV-NICOBAR91/2020_OR_01A87F80",
-            "INDIA/NIV-NICOBAR163/2020_OR_823B2A0F",
-            "INDIA/NIV-NICOBAR106/2020_OR_B81F1DC2",
-            "INDIA/NIV-NICOBAR164/2020_OR_37A11133",
-            # 2022-10-07 potential long branch
-            "PARANA/20675/2022_OR_3D8D671C",
-            # 2022-12-10 potential long branch
-            "DAKAR/3124/2021_279E04A6",
-            # 2022-12-23 potential long branch
-            "NAVARRA/4050/2022_OR_BBC31B3A",
-            # 2023-01-13 potential long branch
-            "SAO_PAULO/356765942-IAL/2022_OR_71BA8B9D",
-            # 2023-01-28 potential long branches
-            "NAVARRA/4050/2022_OR_BBC31B3A",
-            "DAKAR/3124/2021_279E04A6 pdm09-like",
-            "PAKISTAN/GIHSN-GB-445/2022_OR_A20D55B6",
-            "PAKISTAN/GIHSN-ICT-152/2023_OR_572EBEB8",
-            "PAKISTAN/GIHSN-PB-15/2023_OR_484A375D",
-            "SAO_PAULO/356765942-IAL/2022_OR_71BA8B9D",
-            "CATALONIA/NSVH101996495/2023_OR_2E610047",
-        ])
+        .find_masters()  # to be able to use .filter below
+        .include_name(
+            [
+                outgroup,
+                "MICHIGAN/45/2015",
+                "BRAZIL/5026/2017",  # # 183S, trying to show "the different 183P clades has S183P independently", inferred from 2018-0924-ssm tree
+            ]
+            + to_include
+        )  # ~/AD/share/conf/h1.183S.2017-2018.seqids
+        .exclude_name(
+            [
+                "SWINE/",
+                # 2020-08-10 Sarah: They're not sequenced by a CC or NIC, but by a company.  The sequences don't have obvious errors, but are very different (obviously) from the rest. My feeling is they're either variant viruses (from swine) or a sequencing error.
+                "ANKARA/14015-724/2019",
+                "ANKARA/14017-004/2019",
+                "ANKARA/14015-736/2019",
+                # 2021-06-22 deletion at the end
+                # "BADEN-WURTTEMBERG/1/2020",
+                # long branch 2021-0622
+                "DENMARK/1/2021",
+                "MANITOBA/2/2021",
+                # 2021-1117 long branches
+                "MECKLENBURG-VORPOMMERN/1/2021",
+                "MECKLENBURG-VORPOMMERN/1-A/2021",  # refers to MECKLENBURG-VORPOMMERN/1/2021
+                "NETHERLANDS/10370-1/2020",
+                "NETHERLANDS/10370-1A/2020",
+                "NETHERLANDS/10370-2/2020",  # refers to NETHERLANDS/10370-1A/2020
+                "NETHERLANDS/GENT-193/2019",
+                "HESSEN/47/2020",
+                "HUBEI WUJIAGANG/1324/2020",
+                "HEBEI HAIGANG/SWL1572/2019",
+                "SHANDONG/204/2021",
+                "YUNNAN MENGZI/1462/2020",
+                "NORTH CAROLINA/15/2020",
+                "WISCONSIN/3/2021",
+                "PARANA/10835/2021",
+                # 2021-1125 long branches
+                "GANSU_XIFENG/1194/2021",
+                "GANSU_XIFENG/1143/2021",
+                "SICHUAN/1208/2021",
+                "TIANJIN/30/2020",
+                "WISCONSIN/5/2021",
+                "WISCONSIN/4/2021",
+                "NORTH_DAKOTA/12226/2021",
+                # 2021-1217 potential long branch
+                "IOWA/1/2021",  # --> SWINE/MISSOURI/A01104146/2020 (base), SWINE/MISSOURI/A02525160/2021
+                "IOWA/6/2021",  # --> "SWINE/IOWA/A02636087/2021", "SWINE/IOWA/A02636130/2021"
+                "NORTH DAKOTA/12226/2021",  # --> "SWINE/IOWA/A02636087/2021", "SWINE/IOWA/A02636130/2021"
+                # 2021-1217 long branches
+                "NETHERLANDS/10370-1A/2020",
+                # 2022-0131 potential long branch
+                "DENMARK/36/2021",
+                # 2022-04-26 mess in 156-179
+                "INDIA/NIV-NICOBAR91/2020_OR_01A87F80",
+                "INDIA/NIV-NICOBAR163/2020_OR_823B2A0F",
+                "INDIA/NIV-NICOBAR106/2020_OR_B81F1DC2",
+                "INDIA/NIV-NICOBAR164/2020_OR_37A11133",
+                # 2022-10-07 potential long branch
+                "PARANA/20675/2022_OR_3D8D671C",
+                # 2022-12-10 potential long branch
+                "DAKAR/3124/2021_279E04A6",
+                # 2022-12-23 potential long branch
+                "NAVARRA/4050/2022_OR_BBC31B3A",
+                # 2023-01-13 potential long branch
+                "SAO_PAULO/356765942-IAL/2022_OR_71BA8B9D",
+                # 2023-01-28 potential long branches
+                "NAVARRA/4050/2022_OR_BBC31B3A",
+                "DAKAR/3124/2021_279E04A6 pdm09-like",
+                "PAKISTAN/GIHSN-GB-445/2022_OR_A20D55B6",
+                "PAKISTAN/GIHSN-ICT-152/2023_OR_572EBEB8",
+                "PAKISTAN/GIHSN-PB-15/2023_OR_484A375D",
+                "SAO_PAULO/356765942-IAL/2022_OR_71BA8B9D",
+                "CATALONIA/NSVH101996495/2023_OR_2E610047",
+                # 2023-02-04 potential long branch
+                "SWITZERLAND/69094/2022_OR_B3C15C9B",
+                # 2023-0421 potential long branches
+                "CATALONIA/NSVH510598655/2022_OR_57BCC58B",
+                "PAKISTAN/1465/2023_OR_1B7B0A1A",
+                "BADEN-WURTTEMBERG/159/2022_OR_6AB83E7E",
+                "PARANA/44706/2021_OR_25BFEB40",
+                # XXX 2023-0515
+                "A/PAKISTAN/GIHSN/ICT/1838/2023_0867EDC7",
+                "A/PAKISTAN/GIHSN/ICT/1634/2023_OR_3D55578F",
+                "A/PAKISTAN/GIHSN/ICT/1798/2023_OR_C271B3C9",
+                "A/PAKISTAN/GIHSN/ICT/1682/2023_OR_FB7A0386",
+                # XXX 2023-0602
+                "PAKISTAN/GIHSN-ICT2035/2023_OR_4E73FFF6",
+                # XXX  2023-0731
+                "CYCLADES/215/2022_D164055B",
+                # long branch & XXXX 2023-0811
+                "GREECE/SRIHER-001/2022_OR_E68E256A",
+                "SAUDI_ARABIA/40/2020_OR_9B1E74A4",
+                "SAUDI_ARABIA/39/2020_OR_DAE6A100",
+                # 2023-0822 XXXX
+                "PARANA/6405/2023_OR_9ACD4010",
+                "BAHIA/5785/2023_OR_CBFA332A",
+                # 2023-0903 very long branch
+                "NETHERLANDS/10534/2023_OR_6221D5BF",
+                # 2023-09-08 long branch in tree
+                "BADAJOZ/18570374/2023_58158069",
+                # 2023-09-15 XXXX
+                "ALGIERS/251/2023_OR_1A27768",
+                # 2023-10-23 XXX & odd start for SAO_PAULO
+                "SAO_PAULO/CEVI_2300134/2023_OR_BB923F69",
+                "CAMBODIA/KVH230151/2023_OR_1F665D9C",
+                "CAMBODIA/AHC230229/2023_OR_3241A3FC",
+                # 2023-20-28
+                "BULGARIA/725/2023_8A6AF67F",  # nonsense 1-29
+                "CAMBODIA/CCH230213/2023_OR_BE937275",  # XXXX
+                # 2023-12-04
+                "BANGKOK/P5113/2023_OR_CB7624FD",  # long branch
+                # 2024-01-01
+                "SWITZERLAND/114/2023_MDCK1_A2D22DAA",  # long branch
+                # 2024-01-30
+                "CATALONIA/NSAV198289092/2023_OR_291AD7EDPARANA/13897/2023_OR_0A657176",
+                "BELGIUM/H00082/2023_56A80548",
+                "CACERES/59816382/2023_A1324011",
+                "SENEGAL/2098/2023_OR_2EC7BF97",
+                # 2024-0703
+                "GALICIA/GA-CHUAC-98/2024_OR_988888E6",  # 29 nuc differences, 19 nuc differences
+                "MEXICO/2271/2021_338E9C30",  # run of differences, 35-38, 40-42
+                "ALAGOAS/CEVI_2200198/2021_OR_0811FCC2",  # 6 X
+                "MADRID/1832324191549/2024_OR_988BC424",  # 9 X
+                "HUMAN/KENYA/5866/2023_3F1D8A0B",  # 9X
+                # 2024-0718
+                "SAO_PAULO/CEVI_24000816/2024_OR_BC9F903C",  # XXXX
+                "SAO_PAULO/CEVI_24000889/2024_OR_C69399E2",  # dels
+                # 2024-0727
+                "LUXEMBOURG/LNS3963248/2024_04C1C479",  # XXXX
+                "COSTA_RICA/INC-0552-822365/2024_OR_9E1DE43D",  # XXXX
+                # 2024-08-06
+                "CHANTHABURI/P2877/2024_OR_5B993EF6",  # XXXXX
+                "SRI_LANKA/R4_BC75/2024_50C5FC20"  # XXXXX
+                # 2024-0830 XXXX & many changes
+                "OHIO/251/2024_OR_30CB2DAF",
+                "HANOI/PF324163/2024_OR_402B095E",
+                "SOUTH_AFRICA/PET32788/2024_OR_47322CBB",
+                # 2024-0902 long branch
+                "YAOUNDE/24V-3413/2024_8196548B",
+                # 2024-1004
+                "PHUKET/P3423/2024_OR_5E3C7D19",  # XXXX
+                # 2024-1025
+                "PARANA/10835/2021_OR_8025A28A",
+                "TOLEDO/PARANA_10835/2021_OR_8025A28A",
+                "ENGLAND/3640434/2024_OR_A126DDAB",
+                "CATALONIA/NSAV198309324/2024_OR_29D1522E",
+                # 2024-1126
+                "OREGON/FLU-OHSU-242090049/2024_05BD35BD",
+                "OREGON/FLU-OHSU-242090044/2024_DEC6E084",
+                # 2024-1209
+                "NAKHON_PHANOM/F2539/2024_OR_276AD388",
+                # 2025-0901
+                "CAMBODIA/KPH240133/2024_OR_CD9DB776",
+                "ENGLAND/4820116/2024_OR_F5C11575",
+                # 2025-2401
+                "GALICIA/GA-CHUAC-172/2024_OR_000994B0",
+                "ROMANIA/2920/2024_F73C0E92",
+                # 2025-1202
+                "BALTIMORE/JH-173/2025_A2FCE291",
+                "CROATIA/HZJZ-8235/2025_170E7DD7",
+                "ENGLAND/340238/2025_OR_EA36F218",
+                "ENGLAND/121421/2024_OR_2094499C",
+                "ANDALUCIA/PMC-00542/2025_OR_BD2FE74E",
+                "PAKISTAN/NIC-ICT-576/2025_OR_6924EA8E",
+                "ANDALUCIA/PMC-00549/2025_OR_A6464885",
+                "CASTELLON/VAHNSI-GIHSN-02_0386/2025_OR_2E26A6D3",
+                # 2025-0313
+                "QUEENSLAND/IN000908/2025_91CEAA08",
+                "FRANCE/HDF-RELAB-IPP01090/2025_OR_096FF1D6",
+                "CATALONIA/NSVH171785721/2025_OR_C896185E",
+                "KAMCHATKA/205-20V/2025_OR_68CABAFF",
+                # 2025-0507
+                "ENGLAND/5100240/2024_OR_EB872B9D",
+                "CAMBODIA/CCH250029/2025_OR_A4964B9E",
+                "ASTURIAS/232523061/2025_58877E98",
+                "PAKISTAN/NIC_SD193/2025_OR_3A27DCA6",
+                "ESPIRITO_SANTO/475/2025_OR_4FB190FF",
+                "PAKISTAN/NIC_SD184/2025_OR_66AF2F25",
+                "PAKISTAN/NIC_JHL115/2025_OR_53A17665",
+                "GALICIA/22647653/2025_DE4A5E72",
+                "IRELAND/4321/2025_8EB775D5",
+                "ENGLAND/640250/2025_OR_01187600",
+                "PAKISTAN/NIC_NIH749/2025_OR_40914714",
+                "BELGIUM/S4050/2025_OR_E331B39E",
+                "GALICIA/GA-CHUAC-314/2025_OR_59908BA2",
+                "MURCIA/528/2025_OR_E7444F60",
+                "OREL/SWINE-RII-WD2678S/2025_OR_6C9DA738",
+                "OREL/SWINE-RII-WD2681S/2025_OR_B004CABD",
+                "VERMONT/UVM-0478/2022_OR_DFD37A87",
+                "VERMONT/UVM-1652/2022_OR_42E6DF65",
+                "FRANCE/NOR-IPP04189/2025_OR_88070E22",  # caused tree failure
+                # 2025-0716 (omitted exclusions in error, added 29/07, some added 0723 PR)
+                "CAMBODIA/SVH250268/2025_OR_238D9DD5",
+                "CAMBODIA/IAHC250126/2025_OR_FDAC2F6D",
+                "CAMBODIA/IKCM250040/2025_OR_7BD02395",
+                "CAMBODIA/CCH250037/2025_OR_347A5F98",
+                # 2025-0723
+                "NIZHNIY_NOVGOROD/RII-MH239577S/2025_OR_B6AF35B5",
+                "CAMBODIA/KVH250142/2025_OR_3F8690DC",
+                "CAMBODIA/IAHC250133/2025_OR_C34D3AC3",
+                "SURGUT/RII-MH237071S/2025_OR_3A47CC28",
+                "MOROCCO/1168/2025_OR_910D767C",
+                # 2025-0729
+                "ARKHANGELSK/RII-MH246940S/2025_OR_78BF9CC7",
+                # 2025-0818
+                "CATALONIA/NSVH161521968/2025_OR_A0FA1338",
+                "CAMBODIA/NPH250451/2025_OR_3F664AAE",
+                "CAMBODIA/IAHC250237/2025_OR_E4433FD6",
+                "VALENCIA/VAHNSI-GIHSN-09_0742/2025_OR_30A76211",
+                "SAO_PAULO/CEVI_2500217/2025_OR_A0D5251B",
+                "SAO_PAULO/CEVI_2500207/2025_OR_3C666E5E",
+                "HONG_KONG/F40/2024_OR_94A307F6",
+                # 2025-0827
+                "KRASNOGORSK/RII-MH248987S/2025_OR_CA8C4660",
+                # 2025-0910 no exclusions
+                # 2025-0917
+                "CATALONIA/NSVH161521968/2025_OR_A0FA1338",
+                "CAMBODIA/IAHC250237/2025_OR_E4433FD6",
+                "CAMBODIA/NPH250451/2025_OR_3F664AAE",
+                "VALENCIA/VAHNSI-GIHSN-09_0742/2025_OR_30A76211",
+                "SAO_PAULO/CEVI_2500207/2025_OR_3C666E5E",
+                "KRASNOGORSK/RII-MH248987S/2025_OR_CA8C4660",
+                "HONG_KONG/F40/2024_OR_94A307F6",
+                # 2025-1006 no exclusions
+                # 2025-1028
+                "SENEGAL/IPD-2503/2025_OR_20CCF2D9",
+                "POLAND/D7325_10/2023_763600E9",
+                "NETHERLANDS/D6904_36/2022_66C9DF52",
+                "POLAND/D7325_7/2023_CE4D3423",
+                "NETHERLANDS/D7061_16/2022_8D08423F",
+                "SPAIN/D7228_1/2022_F78E9BF7",
+                "AUSTRIA/D7225_20/2023_12AC9B5C",
+                "SPAIN/D7228_40/2023_36844405",
+                "BELGIUM/D6840_21/2022_32C5D497",
+                "SPAIN/D7228_50/2023_99D2EC08",
+                "ITALY/D7434_7/2023_BC9D4B6B",
+                "PORTUGAL/D7272_2/2023_0ED2CAE8",
+                "GERMANY/D7225_5/2023_13A6321B",
+                "GERMANY/D7225_17/2023_E6C859C1",
+                "POLAND/D7325_11/2023_906FE020",
+                "NETHERLANDS/D6840_51/2022_E7F7E7FB",
+                "GERMANY/D7061_22/2023_33D65F89",
+                "POLAND/D6904_24/2022_AF273C3D",
+                "POLAND/D7325_23/2023_36B8A0E4",
+                "NETHERLANDS/D7061_27/2022_DCE07BA1",
+                "POLAND/D6904_5/2022_240EA095",
+                "POLAND/D6840_59/2022_95E58799",
+                "BELGIUM/D7157_61/2023_6A0D50EA",
+                "GERMANY/D7061_33/2022_6DD93351",
+                "SLOVENIA/D7378_1_1/2023_473636B1",
+                "GERMANY/D7061_8/2022_5C22171F",
+                "GERMANY/D7225_1/2023_6BCBB1AC",
+                "GERMANY/D7061_11/2023_EA3CB29E",
+                "GERMANY/D7061_9/2023_D612A4DA",
+                "BULGARIA/D6904_28/2022_74CF41D4",
+                "BULGARIA/D6904_25/2022_DA88ADA9",
+                "SPAIN/D7228_24/2023_F14A6077",
+                # 2025-1112
+                "RIYADH/63/2021_OR_677764E9",
+                "RIYADH/65/2022_OR_B365D7CC",
+                "RIYADH/19/2021_OR_9E0E1562",
+                "RIYADH/88/2022_OR_8E98C98E",
+                "RIYADH/100/2022_OR_8B809E4C",
+                "RIYADH/111/2023_OR_886090A3",
+                "RIYADH/120/2023_OR_EA0E7626",
+                "RIYADH/49/2022_OR_DB27594B",
+                "RIYADH/88/2023_OR_E11E47BF",
+                "RIYADH/66/2022_OR_03C7C7E0",
+                "TOULON/INF_4/2023_OR_A4196D00",
+                # 2025-1126
+                "CAMBODIA/KPH250113/2025_OR_EABF2384",
+                "CAMBODIA/IAHC250287/2025_OR_3BF3D0EB",
+                "CAMBODIA/IMDK250044/2025_OR_037B06A4",
+                "ASTURIAS/232557424/2025_A6C56596",
+            ]
+        )
         .remove_hash_duplicates()
         .replace_with_master()
         .sort("+date")
         .move_name_to_beginning(outgroup)
     )
-    report(subtype, selected, expected_nuc_size=1647, limit=0) # aa 549
+    report(subtype, selected, expected_nuc_size=1647, limit=0)  # aa 549
     export(subtype, selected)
 
+
 # ----------------------------------------------------------------------
 
+
 def h3(subtype: str, args: argparse.Namespace):
     outgroup = "PERTH/16/2009"
     selected = (
@@ -174,141 +523,321 @@ def h3(subtype: str, args: argparse.Namespace):
         .human()
         .exclude_with_issue()
         .filter_dates(first="2018-03")
-        .find_masters()         # to be able to use .filter below
-        .include_name([
-            outgroup,
-            "VICTORIA/361/2011",        # 2011-10-24    3C.3
-            # [2020-09-07] Nearest common ancestor of 2a and 3a in /syn/eu/ac/results/eu/2019-0417-h3-trees/cdc-usa-2009-2019-250-per-year-1.rough-29532.tree.pdf https://notebooks.antigenic-cartography.org/eu/results/eu/2019-0417-h3-trees/
-            "MARYLAND/30/2012",          # 2012-07-26    3C.3
-            # [2020-08-17] Derek thinks that the root (A/STOCKHOLM/6/2014) is not old enough
-            "TEXAS/50/2012",                   # 2012-04-15    3C.3  root in MELB tree 2020
-            "VERMONT/6/2012",
-            "STOCKHOLM/6/2014",                          # 2014-02-06    3a
-            # [2020-02-07] intermediate strains from nextflu https://nextstrain.org/flu/seasonal/h3n2/ha/2y to keep 3a at the top
-            "SWITZERLAND/9715293/2013",   # 2013-12-06    3a
-            "NORWAY/466/2014",             # 2014-02-03    3a
-            "SOUTH_AUSTRALIA/55/2014",               # 2014-06-29    3a
-            "TASMANIA/11/2014",                       # 2014-03-16    3a
-            "KOBE/63/2014",                           # 2014-05-21    3a
-            "PERU/27/2015",                     # 2015-04-13    3a
-            "NEVADA/22/2016",                            # 2016-03-05    3a
-            "IDAHO/33/2016",                             # 2016-06-08    3a
-            "TEXAS/88/2016",                             # 2016-02-25    3a
-            "TEXAS/71/2017",                             # 2017-03-18    3a
-            "BRAZIL/7331/2018",                          # 2018-07-09    3a
-            "KANSAS/14/2017",                          # 2017-12-14    3a, to have serum circles in the sig page
-            "HONG_KONG/4801/2014",              # 2014-02-26    2a
-            "HAWAII/47/2014",                         # 2014-07-18    2a
-            "NORTH_CAROLINA/4/2017",                     # 2017-01-26    2a2
-            "NORTH_CAROLINA/4/2016",                     # 2016-01-14    2a1
-            "ANTANANARIVO/1067/2016",                    # 2016-04-06    2a1
-            "HONG_KONG/2286/2017",              # 2017-05-23    2a1b 135K
-            "WISCONSIN/327/2017",                        # 2017-09-22    2a1b 135K
-            "ABU_DHABI/240/2018",                        # 2018-01-01    2a1b 135K
-            "JAMAICA/1447/2018",                         # 2018-02-19    2a1b 131K
-
-            # Strains before and after T135N substitution to have a better 135N branch placement
-            # Sarah 2021-02-08 17:05
-            "WISCONSIN/85/2016",
-            "SRI_LANKA/56/2017",
-            "SOUTH_CAROLINA/4/2017",
-            "YOKOHAMA/145/2017",
-            "INDIA/9930/2017",
-            "HONG_KONG/3118/2017",
-            "HAWAII/47/2014",
-            "NIIGATA-C/43/2015",
-            "DAKAR/17/2016",
-            # "CAMEROON/16V-9267/2016", # excluded, truncated sequence
-            "LAOS/3008/2016",
-            "YUNNAN_LINXIANG/1718/2016",
-            "HONG_KONG/2302/2016",
-            "ONTARIO/RV2414/2015",
-            "ONTARIO/RV2414/2015",
-            "HONG_KONG/2286/2017",
-            "HONG_KONG/2286/2017",
-            "HONG_KONG/2286/2017",
-            "HONG_KONG/2286/2017",
-            "HONG_KONG/2286/2017",
-        ])
-        .exclude_name([
-            "SWINE/",
-             # long branch (2021-0622)
-            "WISCONSIN/1/2021",
-             # long branch (2021-1117)
-            "HAWAII/28/2020",
-            "SOUTH_AUSTRALIA/85/2018",
-            "SOUTH_AUSTRALIA/1/2021",
-            "MANITOBA/3/2021",
-            "GUANGXI_GANGBEI/190/2019",
-            # 2021-1217 potential long branch
-            "OHIO/6/2021_ISOLATE",
-            # 2022-0204 potential long branch
-            "FRANCE/ARA-HCL021219268801/2021",
-            "FRANCE/ARA-HCL021220304101/2021",
-            # 2022-0209 long branch, eight X, R in nuc
-            "ONTARIO/RV2002/2019_P1_0F1E8F1E",
-            # 2022-0415 potential long branch
-            "ACRE/181823-IEC/2021",
-            # 2022-0819 potential long branch
-            "CATALONIA/NSVH101883267/2022_OR_F79C7813",
-            # 2022-0825 long branch in the tree
-            "INDIANA/27/2018_SIAT1_26C3A2A4",
-            "SWINE/KENTUCKY/18TOSU1939/2018_26C3A2A4",
-            "SWINE/ILLINOIS/18TOSU1411/2018_26C3A2A4",
-            # 2022-0826 potential long branch
-            "FIJI/99/2022_OR_80B95BF8",
-            "FIJI/102/2022_OR_6AEB79D8",
-            "WEST_VIRGINIA/25/2022_SIAT1_3E592413",
-            "WEST_VIRGINIA/26/2022_SIAT1_3E592413",
-            "WEST_VIRGINIA/24/2022_SIAT1_2FDA4DA3",
-            # 2022-0902 potential long branch
-            "FRANCE/ARA-HCL022060609601/2022_65F4520B",
-            # 2022-10-07 potential long branch
-            "CATALONIA/NSVH101908168/2022_OR_D5C0176E",
-            # 2022-10-28 potential long branch
-            "MICHIGAN/48/2022_OR_910196F9",
-            # 2022-11-04 potential long branch
-            "NEW_MEXICO/19/2022_IRX_4BA11056",
-            # 2022-11-11 potential long branches
-            "CATALONIA/NSVH534131326/2022_OR_4AC47F3F",
-            "A/ANAS_PLATYRHYNCHOS/BELGIUM/2499_0006/2021_MIXED_ECE1_DCC5FAD7",
-            "ANAS_PLATYRHYNCHOS/BELGIUM/2499_0006/2021_ECE1_DCC5FAD7",
-            # 2022-11-26 potential long branches
-            "CATALONIA/NSVH101954411/2022_OR_7A633967",
-            "CATALONIA/NSVH510598655/2022_OR_57BCC58B",
-            # 2022-12-02 potential long branches
-            "MISSOURI/19/2022_OR_837C934E",
-            # 2022-12-10 potential long branches
-            "CATALONIA/NSVH560028269/2022_OR_9C189CFA", # 159X 160X 162X 163X 164X
-            "ATHENS/355/2022_D9D4A86A",                 # 38X 40X 41X 42X 59X 67X 81X 128X
-            "MICHIGAN/48/2022_OR_910196F9",
-            "MICHIGAN/UOM10047284024/2022_OR_910196F9",
-            # 2023-01-08 potential long branches
-            "BRITISH_COLUMBIA/PHL-480/2022_OR_EAAE7C01",
-            # 2023-01-13 potential long branch
-            "GALICIA/22034482/2022_2EE89148",
-            "GALICIA/22018524/2022_1EE46082",
-            "GALICIA/22033804/2022_0EE699D6",
-            "GALICIA/21987208/2022_F49E5748",
-            "GALICIA/22036079/2022_430E996A",
-            "GALICIA/22018743/2022_0116B017",
-            "GALICIA/22011636/2022_3D9F40AD",
-            # 2023-01-20 potential long branch
-            "SINGAPORE/GP9117/2022_OR_69761A51",
-            "TURKEY/GIHSN_ANKARA-14020_7/2022_012CD2F7",
-            # 2023-01-28 potential long branches
-            "MICHIGAN/48/2022_OR_910196F9",
-            "MACEDONIA/IPH-MKD-59729/2021_OR_837D0C9D",
-            "KHON_KAEN/FS005/2022_OR_1A5637D8",
-            "TURKEY/GIHSN_ANKARA-14020_7/2022_012CD2F7",
-            "ATHENS/355/2022_D9D4A86",
-            "CATALONIA/NSVH101994299/2023_OR_13A43DA9",
-            "MACEDONIA/IPH-MKD-HD/2021_OR_B52D1807",
-            "SOUTH_AFRICA/R07603/2022_OR_7937FE46",
-            "SOUTH_AFRICA/R05772/2022_OR_552B5F5E",
-            "GALICIA/22034482/2022_2EE89148",
-            "CATALONIA/NSVH560028269/2022_OR_9C189CFA",
-        ])
+        .find_masters()  # to be able to use .filter below
+        .include_name(
+            [
+                outgroup,
+                "VICTORIA/361/2011",  # 2011-10-24    3C.3
+                # [2020-09-07] Nearest common ancestor of 2a and 3a in /syn/eu/ac/results/eu/2019-0417-h3-trees/cdc-usa-2009-2019-250-per-year-1.rough-29532.tree.pdf https://notebooks.antigenic-cartography.org/eu/results/eu/2019-0417-h3-trees/
+                "MARYLAND/30/2012",  # 2012-07-26    3C.3
+                # [2020-08-17] Derek thinks that the root (A/STOCKHOLM/6/2014) is not old enough
+                "TEXAS/50/2012",  # 2012-04-15    3C.3  root in MELB tree 2020
+                "VERMONT/6/2012",
+                "STOCKHOLM/6/2014",  # 2014-02-06    3a
+                # [2020-02-07] intermediate strains from nextflu https://nextstrain.org/flu/seasonal/h3n2/ha/2y to keep 3a at the top
+                "SWITZERLAND/9715293/2013",  # 2013-12-06    3a
+                "NORWAY/466/2014",  # 2014-02-03    3a
+                "SOUTH_AUSTRALIA/55/2014",  # 2014-06-29    3a
+                "TASMANIA/11/2014",  # 2014-03-16    3a
+                "KOBE/63/2014",  # 2014-05-21    3a
+                "PERU/27/2015",  # 2015-04-13    3a
+                "NEVADA/22/2016",  # 2016-03-05    3a
+                "IDAHO/33/2016",  # 2016-06-08    3a
+                "TEXAS/88/2016",  # 2016-02-25    3a
+                "TEXAS/71/2017",  # 2017-03-18    3a
+                "BRAZIL/7331/2018",  # 2018-07-09    3a
+                "KANSAS/14/2017",  # 2017-12-14    3a, to have serum circles in the sig page
+                "HONG_KONG/4801/2014",  # 2014-02-26    2a
+                "HAWAII/47/2014",  # 2014-07-18    2a
+                "NORTH_CAROLINA/4/2017",  # 2017-01-26    2a2
+                "NORTH_CAROLINA/4/2016",  # 2016-01-14    2a1
+                "ANTANANARIVO/1067/2016",  # 2016-04-06    2a1
+                "HONG_KONG/2286/2017",  # 2017-05-23    2a1b 135K
+                "WISCONSIN/327/2017",  # 2017-09-22    2a1b 135K
+                "ABU_DHABI/240/2018",  # 2018-01-01    2a1b 135K
+                "JAMAICA/1447/2018",  # 2018-02-19    2a1b 131K
+                # Strains before and after T135N substitution to have a better 135N branch placement
+                # Sarah 2021-02-08 17:05
+                "WISCONSIN/85/2016",
+                "SRI_LANKA/56/2017",
+                "SOUTH_CAROLINA/4/2017",
+                "YOKOHAMA/145/2017",
+                "INDIA/9930/2017",
+                "HONG_KONG/3118/2017",
+                "HAWAII/47/2014",
+                "NIIGATA-C/43/2015",
+                "DAKAR/17/2016",
+                # "CAMEROON/16V-9267/2016", # excluded, truncated sequence
+                "LAOS/3008/2016",
+                "YUNNAN_LINXIANG/1718/2016",
+                "HONG_KONG/2302/2016",
+                "ONTARIO/RV2414/2015",
+                "ONTARIO/RV2414/2015",
+                "HONG_KONG/2286/2017",
+                "HONG_KONG/2286/2017",
+                "HONG_KONG/2286/2017",
+                "HONG_KONG/2286/2017",
+                "HONG_KONG/2286/2017",
+            ]
+        )
+        .exclude_name(
+            [
+                "SWINE/",
+                # long branch (2021-0622)
+                "WISCONSIN/1/2021",
+                # long branch (2021-1117)
+                "HAWAII/28/2020",
+                "SOUTH_AUSTRALIA/85/2018",
+                "SOUTH_AUSTRALIA/1/2021",
+                "MANITOBA/3/2021",
+                "GUANGXI_GANGBEI/190/2019",
+                # 2021-1217 potential long branch
+                "OHIO/6/2021_ISOLATE",
+                # 2022-0204 potential long branch
+                "FRANCE/ARA-HCL021219268801/2021",
+                "FRANCE/ARA-HCL021220304101/2021",
+                # 2022-0209 long branch, eight X, R in nuc
+                "ONTARIO/RV2002/2019_P1_0F1E8F1E",
+                # 2022-0415 potential long branch
+                "ACRE/181823-IEC/2021",
+                # 2022-0819 potential long branch
+                "CATALONIA/NSVH101883267/2022_OR_F79C7813",
+                # 2022-0825 long branch in the tree
+                "INDIANA/27/2018_SIAT1_26C3A2A4",
+                "SWINE/KENTUCKY/18TOSU1939/2018_26C3A2A4",
+                "SWINE/ILLINOIS/18TOSU1411/2018_26C3A2A4",
+                # 2022-0826 potential long branch
+                "FIJI/99/2022_OR_80B95BF8",
+                "FIJI/102/2022_OR_6AEB79D8",
+                "WEST_VIRGINIA/25/2022_SIAT1_3E592413",
+                "WEST_VIRGINIA/26/2022_SIAT1_3E592413",
+                "WEST_VIRGINIA/24/2022_SIAT1_2FDA4DA3",
+                # 2022-0902 potential long branch
+                "FRANCE/ARA-HCL022060609601/2022_65F4520B",
+                # 2022-10-07 potential long branch
+                "CATALONIA/NSVH101908168/2022_OR_D5C0176E",
+                # 2022-10-28 potential long branch
+                "MICHIGAN/48/2022_OR_910196F9",
+                # 2022-11-04 potential long branch
+                "NEW_MEXICO/19/2022_IRX_4BA11056",
+                # 2022-11-11 potential long branches
+                "CATALONIA/NSVH534131326/2022_OR_4AC47F3F",
+                "A/ANAS_PLATYRHYNCHOS/BELGIUM/2499_0006/2021_MIXED_ECE1_DCC5FAD7",
+                "ANAS_PLATYRHYNCHOS/BELGIUM/2499_0006/2021_ECE1_DCC5FAD7",
+                # 2022-11-26 potential long branches
+                "CATALONIA/NSVH101954411/2022_OR_7A633967",
+                "CATALONIA/NSVH510598655/2022_OR_57BCC58B",
+                # 2022-12-02 potential long branches
+                "MISSOURI/19/2022_OR_837C934E",
+                # 2022-12-10 potential long branches
+                "CATALONIA/NSVH560028269/2022_OR_9C189CFA",  # 159X 160X 162X 163X 164X
+                "ATHENS/355/2022_D9D4A86A",  # 38X 40X 41X 42X 59X 67X 81X 128X
+                "MICHIGAN/48/2022_OR_910196F9",
+                "MICHIGAN/UOM10047284024/2022_OR_910196F9",
+                # 2023-01-08 potential long branches
+                "BRITISH_COLUMBIA/PHL-480/2022_OR_EAAE7C01",
+                # 2023-01-13 potential long branch
+                "GALICIA/22034482/2022_2EE89148",
+                "GALICIA/22018524/2022_1EE46082",
+                "GALICIA/22033804/2022_0EE699D6",
+                "GALICIA/21987208/2022_F49E5748",
+                "GALICIA/22036079/2022_430E996A",
+                "GALICIA/22018743/2022_0116B017",
+                "GALICIA/22011636/2022_3D9F40AD",
+                # 2023-01-20 potential long branch
+                "SINGAPORE/GP9117/2022_OR_69761A51",
+                "TURKEY/GIHSN_ANKARA-14020_7/2022_012CD2F7",
+                # 2023-01-28 potential long branches
+                "MICHIGAN/48/2022_OR_910196F9",
+                "MACEDONIA/IPH-MKD-59729/2021_OR_837D0C9D",
+                "KHON_KAEN/FS005/2022_OR_1A5637D8",
+                "TURKEY/GIHSN_ANKARA-14020_7/2022_012CD2F7",
+                "ATHENS/355/2022_D9D4A86",
+                "CATALONIA/NSVH101994299/2023_OR_13A43DA9",
+                "MACEDONIA/IPH-MKD-HD/2021_OR_B52D1807",
+                "SOUTH_AFRICA/R07603/2022_OR_7937FE46",
+                "SOUTH_AFRICA/R05772/2022_OR_552B5F5E",
+                "GALICIA/22034482/2022_2EE89148",
+                "CATALONIA/NSVH560028269/2022_OR_9C189CFA",
+                # 2023-02-04 potential long branches
+                "MANITOBA/RV00728/2022_P1_5236154F",
+                "ONTARIO/198/2022_OR_D1EC3B40",
+                # 2023-04-21 potential long branches
+                "ROMANIA/493/2023_612606B9",
+                "PAKISTAN/1140/2023_OR_3D2D111B",
+                "CATALONIA/NSVH102045605/2023_OR_7490FFA8",
+                # XXX & long branch 2023-0731
+                "BAHIA/PVM105707/2021_OR_BCE07E46",
+                "SOUTH_AFRICA/K056301/2023_30E0E639",
+                # XXXXX 2023-0811 CORRECTED 2023-1023
+                "BAHIA/292038965/2021_70230607",
+                "BAHIA/292104308/2022_D4526D62",
+                # XXXXX 2023-0829 CORRECTED 2023-1023
+                "SOUTH_AFRICA/PET28428/2023_7572E223",
+                "SOUTH_AFRICA/PET28193/2023_BF46BBB4",
+                "NORWAY/8084/2023_OR_4548A267",
+                # 2023-09-08 potential long branch ?why
+                "SOUTH_AFRICA/K056301/2023_CC1FE952"
+                # 2023-10-23 XXX
+                "PARA/CEVI_2200263/2021_OR_6D24213E",
+                "SAO_PAULO/CEVI_2200855/2022_OR_D4E825BF",
+                "SAO_PAULO/CEVI_2200871/2022_OR_280FCE18",
+                # 2023-10-28
+                "SOUTH_AFRICA/K056301/2023_CC1FE952",  # very divergent
+                # 2023-12-04 XXXX
+                "OMAN/CPHL_52323056/2023_OR_297A7E60",
+                "LUSAKA/1-NIC-332/2023_OR_BA018CE9",
+                "NORWAY/11072/2023_OR_EB1EA580",
+                # 2024-01-01
+                "SAINT_PETERSBURG/RII-MH151473S/2023_OR_D20D279D",  # XX
+                "SAINT_PETERSBURG/RII-4937S/2023_OR_95887A8C",  # XX
+                "CANADA/NOVASCOTIA-32036/2022_A90EF453",  # first 7 aa
+                "BRITISH_COLUMBIA/PHL-602/2022_OR_A1B8E845",  # XX
+                # 2024-01-22 for 2024-01-16 file
+                "INDONESIA/BIOKES-IBKL1127/2023_OR_791937E7",  # long branch
+                "SAO_PAULO/CEVI_2201055/2022_OR_BF9D3752",  # XX
+                "ROMANIA/3227/2023_E20D207E",  # XX
+                "SAO_PAULO/CEVI_2200903/2022_OR_BB30A339",  # XX
+                "CACERES/50644480/2023_361FA786",  # long branch
+                "BELGIUM/S1112/2023_OR_D7ABA060",  # long branch
+                "BELGIUM/S0576/2023_OR_CEC5B896",  # long branch
+                "BELGIUM/S0220/2023_OR_0B549607",  # long branch
+                "BELGIUM/H0592/2022_OR_22BD830C",  # long branch
+                # 2024-01-30
+                "SPAIN/GA-HULA-615019/2023_B7077195",
+                "BELGIUM/S03941/2023_F4250630",
+                "NEW_YORK/392/2004_CFB2F75C",
+                "SPAIN/GA-HULA-615037/2023_E8183B9E",
+                "CACERES/59816379/2023_4D9F91F8",
+                "TURKEY/GIHSN_ANKARA-140111_950/2023_E0D8B88D",
+                # 2024-02-11 XXXX Indonesia is long branch
+                "KENYA/AFI-KDH-1820/2023_B3509344",
+                "INDONESIA/BIOKES-IDSW0063/2023_OR_5962326E",
+                "KENYA/AFI-ISB-1107/2023_E5F19B22",
+                "KENYA/AFI-MNB-1100/2023_6D42271",
+                # 2024-0703
+                "CHIANG_RAI/P2287/2024_OR_2744AA9C",  # 3 deletions
+                # 2024-0727
+                "SASKATCHEWAN/RV03900/2024_OR_029CB62D",  # 198 diffs
+                "WASHINGTON/S25532/2024_OR_13A107A6",  # XXXX
+                "OREGON/S25529/2024_OR_5502D106",  # XXXX
+                "OREGON/S25537/2023_OR_BE6502FF",  # XXXX
+                "CAMBODIA/CCH230307/2023_7CC43803",  # XXXX
+                "CAMBODIA/IAHC230253/2023_50A155E0",  # XXXX
+                # 2024-08-06
+                "VIRGINIA/H20-355-5372/2024_OR_53F1E92E",  # XXXXX
+                "PANAMA/M273491/2024_OR_27E79A9B"  # XXXXX
+                # 2024-0830 many changes
+                "COLORADO/150/2024_OR_2BD9D59A",
+                "COLORADO/150/2024_SIAT1_14B3FB98",
+                "MICHIGAN/113/2024_SIAT1_B0E6BEF3",
+                # 2024-0902
+                "SASKATCHEWAN/RV03900/2024_OR_029CB62D",  # long branch
+                "PANAMA/M273491/2024_OR_27E79A9B",  # XXXX
+                # 2024-1004
+                "CAMBODIA/NPH240499/2024_OR_F38419C8",  # XXXX
+                # 2024-1025
+                "CAMBODIA/NPH240575/2024_OR_33D3499A",
+                # 2024-1115
+                "PARANA/44706/2021_OR_25BFEB40",
+                "SANTA_HELENA/PARANA_44706/2021_OR_25BFEB40",
+                # 2024-0111 (tree built 2024-1811)
+                "GREATER_ACCRA_REGION/FS-24-4832/2024_B7004939",
+                "GREATER_ACCRA_REGION/FS-24-4779/2024_1393BD1C",
+                # 2024-2611
+                "QUEENSLAND/IN000643/2024_1ACA7E5A",
+                "QUEENSLAND/IN000245/2024_0854A342",
+                "WASHINGTON/S26082/2022_OR_7462BFDE",
+                "WASHINGTON/S26079/2022_OR_07B0CEEC",
+                "WASHINGTON/S26107/2023_OR_A56901B3",
+                "WASHINGTON/S26104/2022_OR_E80CAD50",
+                "WASHINGTON/S26097/2022_OR_40AE3B8C",
+                "WASHINGTON/S26077/2022_OR_C294BE90",
+                "WASHINGTON/S26098/2022_OR_1EDB1957",
+                "WASHINGTON/S26088/2022_OR_C6B3FAA9",
+                # 2024-0912
+                "GALICIA/28220935/2023_C495745E",
+                "GALICIA/33341960/2023_0FF7D1C8",
+                # 2025-2401
+                "TURKMENISTAN/784/2025_OR_F898A707",
+                # 2025-1202
+                "COTE_DIVOIRE/4331/2024_OR_E0FF092D",
+                # 2025-1202
+                "WISCONSIN/67/2005_XXMDCK2_8C52B46C",
+                # 2025-0716 (omitted exclusions in error, added 29/07 PR)
+                "INDIA/BHO_NIV25-827/2025_MDCK3_C54368E2",
+                "CAMBODIA/IAHC240402/2024_OR_35F1131A",
+                "INDIA/PUN_NIVCOV2572/2025_MDCK1_C9AF818D",
+                "ASTURIAS/232524600/2025_A4329A8F",
+                "OREL/SWINE-RII-WD2652S/2025_OR_57A8B37C",
+                "OREL/SWINE-RII-WD2687S/2025_OR_C384E32B",
+                "MEXICO/2496813201/2025_OR_E2853515",
+                # 2025-0723
+                "NORWAY/6870/2025_OR_828769BE",
+                "BRITISH_COLUMBIA/RV00479/2025_SIAT2/SIAT1_0DA227AA",
+                "QUEBEC/240/2025_OR_8D4D59A8",
+                "GALICIA/52520233/2025_F71B58B8",
+                "BULGARIA/2506/2025_SIAT2/SIAT1_8B0BEF2D",
+                "BULGARIA/1941/2025_SIAT2/SIAT1_0D5E6F90",
+                "CAMBODIA/IBTB250007/2025_OR_4E893E81",
+                "ASTURIAS/232525925/2025_719B9A6C",
+                "BAVARIA/6511057/2025_OR_D759C9B5",
+                "MEXICO/2496813201/2025_OR_E2853515",
+                "CAMBODIA/SVH250074/2025_OR_2253D125",
+                "ALBANIA/310521/2025_SIAT1_5DCA4F3D",
+                "CAMBODIA/IAHC250174/2025_OR_6BFB38A8",
+                "FRANCE/ARA-HCL023004739801/2023_OR_003FC67E",
+                "SOUTH_AFRICA/NHLS-UCT-CERI-C067966/2025_EE268891",
+                "PERU/LIM-UPCH-4915/2025_1E953D52",
+                # 2025-0729 no exclusions
+                # 2025-0818
+                "INDIA/BHO-NIV25-1226/2025_MDCK1_2E99D391",
+                "INDIA/PUD-NIV25-1183/2025_MDCK4_1BD68829",
+                "INDIA/JAI-NIV25-1120/2025_MDCK1_9051692B",
+                "ALBANIA/310569/2025_SIAT1_A284C73A",
+                "SWINE/OHIO/A02858604/2025_OR_3F9C4EB8",
+                "SWINE/NORTH_CAROLINA/A02979483/2025_OR_9F18313D",
+                "SWINE/IOWA/A02858606/2025_OR_8367F95A",
+                "SWINE/IOWA/A02858602/2025_OR_6B07CDAC",
+                "SWINE/MINNESOTA/A02858601/2025_OR_8A30B201",
+                "SWINE/MINNESOTA/A02979412/2025_OR_1838EF26",
+                # 2025-0827 no exclusions
+                # 2025-0910 no exclusions
+                # 2025-0917
+                "INDIA/BHO-NIV25-1226/2025_MDCK1_2E99D391",
+                "INDIA/PUD-NIV25-1183/2025_MDCK4_1BD68829",
+                "ALBANIA/310569/2025_SIAT1_A284C73A",
+                "INDIA/AS-RMRC-DIB-1189/2023_D09F02E4",
+                # 2025-1006
+                "MEXICO/2578825701/2025_OR_B6857194",
+                "CAMPO_GRANDE/LEIAL6538/2025_OR_3083EF3A",
+                # 2025-1028
+                "INDIA/DIB-NIV25-1297/2025_DEAC04B8",
+                "INDIA/PUN-NIV25-1199/2025_EA34056E",
+                "INDIA/DEL-NIV25-1587/2025_01FD20A7",
+                "INDIA/PUN-NIVCOV251684/2025_95A3CE8E",
+                "INDIA/PUD-NIV25-1176/2025_8B8D9E17",
+                "INDIA/LUK-NIV25-1336/2025_4C35C9DE",
+                "INDIA/DIB-NIV25-1309/2025_0D480D68",
+                "INDIA/LUK-NIV25-1344/2025_ED98BB3E",
+                "INDIA/PUD-NIV25-1341/2025_663D69FB",
+                "GALICIA/GA-CHUAC-478/2025_OR_AE004DEB",
+                # 2025-1112
+                "ESPIRITO_SANTO/4988/2025_OR_C4DA8B91",
+                # 2025-1126
+                "CAMBODIA/NPH250828/2025_OR_B75D636B",
+                "CAMBODIA/KVH250244/2025_OR_DB084CEF",
+                "CAMBODIA/ISVR250088/2025_OR_8696921D",
+                "CAMBODIA/KPH250212/2025_OR_0DF92A52",
+                "CAMBODIA/INPH250362/2025_OR_6FD76539",
+                "CAMBODIA/AHC250325/2025_OR_10F98EBA",
+                "CAMBODIA/NPH250817/2025_OR_DD4119F8",
+                "CAMBODIA/AHC250341/2025_OR_D6BA3933",
+                "CAMBODIA/IKPT250105/2025_OR_6BA23E98",
+                "KILIFI/MAT220593/2021_OR_E96D0DC0",
+                "MINAS_GERAIS/CEVI_2501233/2025_OR_ACDD91A4",
+            ]
+        )
         .remove_hash_duplicates()
         .replace_with_master()
         .sort("+date")
@@ -317,41 +846,61 @@ def h3(subtype: str, args: argparse.Namespace):
     report(subtype, selected, expected_nuc_size=1650, limit=0)
     export(subtype, selected)
 
+
 # ----------------------------------------------------------------------
 
 sSubtypeToFunc = {"BVIC": bvic, "BYAM": byam, "A(H1N1)": h1, "A(H3N2)": h3}
 
 # ----------------------------------------------------------------------
 
-def export(subtype: str, selected :ae_backend.seqdb.Selected):
-    fasta.write(filename_or_stream=subtype_dir(subtype).joinpath("source.fas"), selected=selected, aa=False)
+
+def export(subtype: str, selected: ae_backend.seqdb.Selected):
+    fasta.write(
+        filename_or_stream=subtype_dir(subtype).joinpath("source.fas"),
+        selected=selected,
+        aa=False,
+    )
+
 
 # ----------------------------------------------------------------------
 
+
 def subtype_dir(subtype: str):
     dir = Path(sSubdirforSubtype[subtype])
     dir.mkdir(exist_ok=True)
     return dir
 
+
 # ----------------------------------------------------------------------
 
-def report(subtype: str, selected :ae_backend.seqdb.Selected, expected_nuc_size: int, limit: int = 20):
+
+def report(
+    subtype: str,
+    selected: ae_backend.seqdb.Selected,
+    expected_nuc_size: int,
+    limit: int = 20,
+):
     print(f"{subtype:7s} {len(selected)}")
     for seq in selected:
         if len(seq.nuc) != expected_nuc_size:
-            print(f">> wrong size {len(seq.nuc)} of {seq.seq_id()} (expected: {expected_nuc_size}) ")
+            print(
+                f">> wrong size {len(seq.nuc)} of {seq.seq_id()} (expected: {expected_nuc_size}) "
+            )
     for no, seq in enumerate(selected):
         if no >= limit:
             break
         # print(f"{no:4d} {seq.seq_id():40s} [{seq.date()}] {seq.aa}")
         print(f"{no:4d} {seq.seq_id():40s} [{seq.date()}]")
 
+
 # ======================================================================
 
 try:
     parser = argparse.ArgumentParser(description=__doc__)
     parser.add_argument("subtype", nargs="*", type=lambda src: src.upper())
-    parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", default=False)
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", action="store_true", default=False
+    )
     args = parser.parse_args()
     exit_code = main(args) or 0
 except Exception as err:
diff --git a/proj/weekly-tree/export~ b/proj/weekly-tree/export~
new file mode 100755
index 0000000..b5cbf12
--- /dev/null
+++ b/proj/weekly-tree/export~
@@ -0,0 +1,437 @@
+
+#! /usr/bin/env python3
+
+import sys, os, argparse, traceback
+from pathlib import Path
+sys.path[:0] = [str(Path(os.environ["AE_ROOT"], dir)) for dir in ["build", "py"]]
+import ae_backend
+from ae.sequences.source import fasta
+
+# ======================================================================
+
+sSubtypeForSeqdb = {"B": "B", "BVIC": "B", "BYAM": "B", "A(H1N1)": "A(H1N1)", "H1": "A(H1N1)", "1": "A(H1N1)", "A(H3N2)": "A(H3N2)", "H3": "A(H3N2)", "3": "A(H3N2)"}
+sSubtypeNorm = {"BV": "BVIC", "BVIC": "BVIC", "BY": "BYAM", "BYAM": "BYAM", "A(H1N1)": "A(H1N1)", "H1": "A(H1N1)", "1": "A(H1N1)", "A(H3N2)": "A(H3N2)", "H3": "A(H3N2)", "3": "A(H3N2)"}
+sSubdirforSubtype = {"BVIC": "bvic", "BYAM": "byam", "A(H1N1)": "h1", "A(H3N2)": "h3"}
+
+def main(args: argparse.Namespace):
+    subtypes = [sSubtypeNorm[st] for st in (args.subtype or ["BVIC", "BYAM", "H1", "H3"])]
+    for subtype in subtypes:
+        sSubtypeToFunc[subtype](subtype=subtype, args=args)
+
+# ----------------------------------------------------------------------
+
+def bvic(subtype: str, args: argparse.Namespace):
+    outgroup = "VICTORIA/830/2013"
+    selected = (
+        ae_backend.seqdb.for_subtype(sSubtypeForSeqdb[subtype])
+        .select_all()
+        .human()
+        .lineage("VICTORIA")
+        .exclude_with_issue()
+        .filter_dates(first="2016-06")
+        .find_masters()         # to be able to use .filter below
+        .exclude(lambda ref: ref.aa["165-"]) # exclude 6-del and MIE/1/2019 with garbage in the middle
+        .exclude_name([
+            "ANHUI_JINGHU/63/2019", # garbage in 406-414 leading to a long branch (2021-0722)
+            "LISBOA/NISU182_17-18/2018", # yamagata, long branch 2021-1117
+            # "ZAMBIA/503/2021", # potential long branch 2022-0401
+            # 2023-01-28 potential long branch
+            "KRABI/FS002/2022",
+            "CATALONIA/NSVH151199301/2023_OR_0ACCE47F", # XXXX 2023-0421
+            "SOUTH_AFRICA/R10785/2022_OR_0EB8BBEA", # XXX 2023-0515
+            "SOUTH_AFRICA/R10907/2022_OR_BA5B358C",
+            "SOUTH_AFRICA/R09298/2022_OR_1A5C630A",
+            "SOUTH_AFRICA/R10914/2022_OR_ADA6D682",
+            "SOUTH_AFRICA/R10638/2022_OR_5EAB4CBB",
+            "SOUTH_AFRICA/R11310/2022_OR_12125C84", 
+            "SOUTH_AFRICA/R11575/2022_OR_FED84F4F", 
+            "SOUTH_AFRICA/R08092/2022_OR_93DDF2B2",
+            # XXX 2023-0602
+            "LA_REUNION/HCL023041635201/2022_103E7754",
+            "ROMANIA/550950/2023_MDCK-SIAT_E0DECCD2",
+            "SAINT_PETERSBURG/RII-10866S/2023_OR_477275F7", 
+            # XXX2023-0629
+            "ABU_DHABI/4030/2022_AEC2DE48",
+            "RIO_GRANDE_DO_SUL/28773/2022_455F4FFF",
+            # 2023-0731
+            "MALAYSIA/IMR-SARI0563/2023_25FE3E0E",
+            #2023-0811 Yam
+            "KENYA/KCH157321/2019_OR_308AC34A",
+            # 2023-0822 XXX
+            "ALAGOAS/5742/2023_OR_441065AD",
+            "ALAGOAS/4659/2023_OR_558AC3D6",
+            "PARANA/5012/2023_OR_D54659DC",
+            # 2023-1005 XXXX
+            "UNITED_KINGDOM/UO-476/2023",
+             ])
+        .include_name(outgroup)    # outgroup BRISBANE/60/2008, B/CHONGQING BANAN/1840/2017 (V1A)
+        .remove_hash_duplicates()
+        .replace_with_master()
+        .check()
+        .sort("+date")
+        .move_name_to_beginning(outgroup)
+    )
+    report(subtype, selected, expected_nuc_size=1710, limit=0)
+    export(subtype, selected)
+
+# ----------------------------------------------------------------------
+
+def byam(subtype: str, args: argparse.Namespace):
+    outgroup = "MINNESOTA/2/2014"
+    selected = (
+        ae_backend.seqdb.for_subtype(sSubtypeForSeqdb[subtype])
+        .select_all()
+        .human()
+        .lineage("YAMAGATA")
+        .exclude_with_issue()
+        .filter_dates(first="2015")
+        .find_masters()         # to be able to use .filter below
+        .include_name([outgroup]) # , "IDAHO/1/2014", "MASSACHUSETTS/7/2014"])
+        .exclude_name([lin for lin in (line.strip() for line in Path("byam-exclude.txt").open()) if lin and lin[0] != "#"])
+        .remove_hash_duplicates()
+        .replace_with_master()
+        .sort("+date")
+        .move_name_to_beginning(outgroup)
+    )
+    report(subtype, selected, expected_nuc_size=1710, limit=0)
+    export(subtype, selected)
+
+# ----------------------------------------------------------------------
+
+def h1(subtype: str, args: argparse.Namespace):
+    h1_include_txt = Path("h1-include.txt")
+    if not h1_include_txt.exists():
+        h1_include_txt = Path(sys.argv[0]).parent.joinpath(h1_include_txt.name)
+        if not h1_include_txt.exists():
+            raise RuntimeError("> cannot find {h1_include_txt}")
+    to_include = [lin for lin in (line.strip() for line in h1_include_txt.open()) if lin and lin[0] != "#"]
+    # to_include = to_include[:-500]
+    outgroup = "SOUTH_AUSTRALIA/30/2013"
+    selected = (
+        ae_backend.seqdb.for_subtype(sSubtypeForSeqdb[subtype])
+        .select_all()
+        .human()
+        .exclude_with_issue()
+        .filter_dates(first="2019")
+        .find_masters()         # to be able to use .filter below
+        .include_name([
+            outgroup,
+            "MICHIGAN/45/2015",
+            "BRAZIL/5026/2017", # # 183S, trying to show "the different 183P clades has S183P independently", inferred from 2018-0924-ssm tree
+        ] + to_include) # ~/AD/share/conf/h1.183S.2017-2018.seqids
+        .exclude_name([
+            "SWINE/",
+            # 2020-08-10 Sarah: They're not sequenced by a CC or NIC, but by a company.  The sequences don't have obvious errors, but are very different (obviously) from the rest. My feeling is they're either variant viruses (from swine) or a sequencing error.
+            "ANKARA/14015-724/2019",
+            "ANKARA/14017-004/2019",
+            "ANKARA/14015-736/2019",
+            # 2021-06-22 deletion at the end
+            # "BADEN-WURTTEMBERG/1/2020",
+            # long branch 2021-0622
+            "DENMARK/1/2021",
+            "MANITOBA/2/2021",
+            # 2021-1117 long branches
+            "MECKLENBURG-VORPOMMERN/1/2021",
+            "MECKLENBURG-VORPOMMERN/1-A/2021", # refers to MECKLENBURG-VORPOMMERN/1/2021
+            "NETHERLANDS/10370-1/2020",
+            "NETHERLANDS/10370-1A/2020",
+            "NETHERLANDS/10370-2/2020", # refers to NETHERLANDS/10370-1A/2020
+            "NETHERLANDS/GENT-193/2019",
+            "HESSEN/47/2020",
+            "HUBEI WUJIAGANG/1324/2020",
+            "HEBEI HAIGANG/SWL1572/2019",
+            "SHANDONG/204/2021",
+            "YUNNAN MENGZI/1462/2020",
+            "NORTH CAROLINA/15/2020",
+            "WISCONSIN/3/2021",
+            "PARANA/10835/2021",
+            # 2021-1125 long branches
+            "GANSU_XIFENG/1194/2021",
+            "GANSU_XIFENG/1143/2021",
+            "SICHUAN/1208/2021",
+            "TIANJIN/30/2020",
+            "WISCONSIN/5/2021",
+            "WISCONSIN/4/2021",
+            "NORTH_DAKOTA/12226/2021",
+            # 2021-1217 potential long branch
+            "IOWA/1/2021", # --> SWINE/MISSOURI/A01104146/2020 (base), SWINE/MISSOURI/A02525160/2021
+            "IOWA/6/2021", # --> "SWINE/IOWA/A02636087/2021", "SWINE/IOWA/A02636130/2021"
+            "NORTH DAKOTA/12226/2021", # --> "SWINE/IOWA/A02636087/2021", "SWINE/IOWA/A02636130/2021"
+            # 2021-1217 long branches
+            "NETHERLANDS/10370-1A/2020",
+            # 2022-0131 potential long branch
+            "DENMARK/36/2021",
+            # 2022-04-26 mess in 156-179
+            "INDIA/NIV-NICOBAR91/2020_OR_01A87F80",
+            "INDIA/NIV-NICOBAR163/2020_OR_823B2A0F",
+            "INDIA/NIV-NICOBAR106/2020_OR_B81F1DC2",
+            "INDIA/NIV-NICOBAR164/2020_OR_37A11133",
+            # 2022-10-07 potential long branch
+            "PARANA/20675/2022_OR_3D8D671C",
+            # 2022-12-10 potential long branch
+            "DAKAR/3124/2021_279E04A6",
+            # 2022-12-23 potential long branch
+            "NAVARRA/4050/2022_OR_BBC31B3A",
+            # 2023-01-13 potential long branch
+            "SAO_PAULO/356765942-IAL/2022_OR_71BA8B9D",
+            # 2023-01-28 potential long branches
+            "NAVARRA/4050/2022_OR_BBC31B3A",
+            "DAKAR/3124/2021_279E04A6 pdm09-like",
+            "PAKISTAN/GIHSN-GB-445/2022_OR_A20D55B6",
+            "PAKISTAN/GIHSN-ICT-152/2023_OR_572EBEB8",
+            "PAKISTAN/GIHSN-PB-15/2023_OR_484A375D",
+            "SAO_PAULO/356765942-IAL/2022_OR_71BA8B9D",
+            "CATALONIA/NSVH101996495/2023_OR_2E610047",
+	    # 2023-02-04 potential long branch
+	    "SWITZERLAND/69094/2022_OR_B3C15C9B",
+            # 2023-0421 potential long branches
+       	    "CATALONIA/NSVH510598655/2022_OR_57BCC58B",
+	    "PAKISTAN/1465/2023_OR_1B7B0A1A",
+	    "BADEN-WURTTEMBERG/159/2022_OR_6AB83E7E",
+	    "PARANA/44706/2021_OR_25BFEB40",
+            # XXX 2023-0515 
+            "A/PAKISTAN/GIHSN/ICT/1838/2023_0867EDC7",  
+            "A/PAKISTAN/GIHSN/ICT/1634/2023_OR_3D55578F", 
+            "A/PAKISTAN/GIHSN/ICT/1798/2023_OR_C271B3C9", 
+            "A/PAKISTAN/GIHSN/ICT/1682/2023_OR_FB7A0386",
+            # XXX 2023-0602
+           "PAKISTAN/GIHSN-ICT2035/2023_OR_4E73FFF6",
+            # XXX  2023-0731
+            "CYCLADES/215/2022_D164055B",
+            #long branch & XXXX 2023-0811
+            "GREECE/SRIHER-001/2022_OR_E68E256A",
+            "SAUDI_ARABIA/40/2020_OR_9B1E74A4",
+            "SAUDI_ARABIA/39/2020_OR_DAE6A100", 
+            # 2023-0822 XXXX
+            "PARANA/6405/2023_OR_9ACD4010",
+            "BAHIA/5785/2023_OR_CBFA332A",
+            # 2023-0903 very long branch
+            "NETHERLANDS/10534/2023_OR_6221D5BF",
+            # 2023-09-08 long branch in tree
+            "BADAJOZ/18570374/2023_58158069",
+            # 2023-09-15 XXXX
+            "ALGIERS/251/2023_OR_1A27768",
+        ])
+        .remove_hash_duplicates()
+        .replace_with_master()
+        .sort("+date")
+        .move_name_to_beginning(outgroup)
+    )
+    report(subtype, selected, expected_nuc_size=1647, limit=0) # aa 549
+    export(subtype, selected)
+
+# ----------------------------------------------------------------------
+
+def h3(subtype: str, args: argparse.Namespace):
+    outgroup = "PERTH/16/2009"
+    selected = (
+        ae_backend.seqdb.for_subtype(sSubtypeForSeqdb[subtype])
+        .select_all()
+        .human()
+        .exclude_with_issue()
+        .filter_dates(first="2018-03")
+        .find_masters()         # to be able to use .filter below
+        .include_name([
+            outgroup,
+            "VICTORIA/361/2011",        # 2011-10-24    3C.3
+            # [2020-09-07] Nearest common ancestor of 2a and 3a in /syn/eu/ac/results/eu/2019-0417-h3-trees/cdc-usa-2009-2019-250-per-year-1.rough-29532.tree.pdf https://notebooks.antigenic-cartography.org/eu/results/eu/2019-0417-h3-trees/
+            "MARYLAND/30/2012",          # 2012-07-26    3C.3
+            # [2020-08-17] Derek thinks that the root (A/STOCKHOLM/6/2014) is not old enough
+            "TEXAS/50/2012",                   # 2012-04-15    3C.3  root in MELB tree 2020
+            "VERMONT/6/2012",
+            "STOCKHOLM/6/2014",                          # 2014-02-06    3a
+            # [2020-02-07] intermediate strains from nextflu https://nextstrain.org/flu/seasonal/h3n2/ha/2y to keep 3a at the top
+            "SWITZERLAND/9715293/2013",   # 2013-12-06    3a
+            "NORWAY/466/2014",             # 2014-02-03    3a
+            "SOUTH_AUSTRALIA/55/2014",               # 2014-06-29    3a
+            "TASMANIA/11/2014",                       # 2014-03-16    3a
+            "KOBE/63/2014",                           # 2014-05-21    3a
+            "PERU/27/2015",                     # 2015-04-13    3a
+            "NEVADA/22/2016",                            # 2016-03-05    3a
+            "IDAHO/33/2016",                             # 2016-06-08    3a
+            "TEXAS/88/2016",                             # 2016-02-25    3a
+            "TEXAS/71/2017",                             # 2017-03-18    3a
+            "BRAZIL/7331/2018",                          # 2018-07-09    3a
+            "KANSAS/14/2017",                          # 2017-12-14    3a, to have serum circles in the sig page
+            "HONG_KONG/4801/2014",              # 2014-02-26    2a
+            "HAWAII/47/2014",                         # 2014-07-18    2a
+            "NORTH_CAROLINA/4/2017",                     # 2017-01-26    2a2
+            "NORTH_CAROLINA/4/2016",                     # 2016-01-14    2a1
+            "ANTANANARIVO/1067/2016",                    # 2016-04-06    2a1
+            "HONG_KONG/2286/2017",              # 2017-05-23    2a1b 135K
+            "WISCONSIN/327/2017",                        # 2017-09-22    2a1b 135K
+            "ABU_DHABI/240/2018",                        # 2018-01-01    2a1b 135K
+            "JAMAICA/1447/2018",                         # 2018-02-19    2a1b 131K
+
+            # Strains before and after T135N substitution to have a better 135N branch placement
+            # Sarah 2021-02-08 17:05
+            "WISCONSIN/85/2016",
+            "SRI_LANKA/56/2017",
+            "SOUTH_CAROLINA/4/2017",
+            "YOKOHAMA/145/2017",
+            "INDIA/9930/2017",
+            "HONG_KONG/3118/2017",
+            "HAWAII/47/2014",
+            "NIIGATA-C/43/2015",
+            "DAKAR/17/2016",
+            # "CAMEROON/16V-9267/2016", # excluded, truncated sequence
+            "LAOS/3008/2016",
+            "YUNNAN_LINXIANG/1718/2016",
+            "HONG_KONG/2302/2016",
+            "ONTARIO/RV2414/2015",
+            "ONTARIO/RV2414/2015",
+            "HONG_KONG/2286/2017",
+            "HONG_KONG/2286/2017",
+            "HONG_KONG/2286/2017",
+            "HONG_KONG/2286/2017",
+            "HONG_KONG/2286/2017",
+            # XXXXX 2023-0811
+            "BAHIA/292038965/2021_70230607",
+            "BAHIA/292104308/2022_D4526D62",
+            # XXXXX 2023-0829
+            "SOUTH_AFRICA/PET28428/2023_7572E223",
+            "SOUTH_AFRICA/PET28193/2023_BF46BBB4",
+            "NORWAY/8084/2023_OR_4548A267",
+        ])
+        .exclude_name([
+            "SWINE/",
+             # long branch (2021-0622)
+            "WISCONSIN/1/2021",
+             # long branch (2021-1117)
+            "HAWAII/28/2020",
+            "SOUTH_AUSTRALIA/85/2018",
+            "SOUTH_AUSTRALIA/1/2021",
+            "MANITOBA/3/2021",
+            "GUANGXI_GANGBEI/190/2019",
+            # 2021-1217 potential long branch
+            "OHIO/6/2021_ISOLATE",
+            # 2022-0204 potential long branch
+            "FRANCE/ARA-HCL021219268801/2021",
+            "FRANCE/ARA-HCL021220304101/2021",
+            # 2022-0209 long branch, eight X, R in nuc
+            "ONTARIO/RV2002/2019_P1_0F1E8F1E",
+            # 2022-0415 potential long branch
+            "ACRE/181823-IEC/2021",
+            # 2022-0819 potential long branch
+            "CATALONIA/NSVH101883267/2022_OR_F79C7813",
+            # 2022-0825 long branch in the tree
+            "INDIANA/27/2018_SIAT1_26C3A2A4",
+            "SWINE/KENTUCKY/18TOSU1939/2018_26C3A2A4",
+            "SWINE/ILLINOIS/18TOSU1411/2018_26C3A2A4",
+            # 2022-0826 potential long branch
+            "FIJI/99/2022_OR_80B95BF8",
+            "FIJI/102/2022_OR_6AEB79D8",
+            "WEST_VIRGINIA/25/2022_SIAT1_3E592413",
+            "WEST_VIRGINIA/26/2022_SIAT1_3E592413",
+            "WEST_VIRGINIA/24/2022_SIAT1_2FDA4DA3",
+            # 2022-0902 potential long branch
+            "FRANCE/ARA-HCL022060609601/2022_65F4520B",
+            # 2022-10-07 potential long branch
+            "CATALONIA/NSVH101908168/2022_OR_D5C0176E",
+            # 2022-10-28 potential long branch
+            "MICHIGAN/48/2022_OR_910196F9",
+            # 2022-11-04 potential long branch
+            "NEW_MEXICO/19/2022_IRX_4BA11056",
+            # 2022-11-11 potential long branches
+            "CATALONIA/NSVH534131326/2022_OR_4AC47F3F",
+            "A/ANAS_PLATYRHYNCHOS/BELGIUM/2499_0006/2021_MIXED_ECE1_DCC5FAD7",
+            "ANAS_PLATYRHYNCHOS/BELGIUM/2499_0006/2021_ECE1_DCC5FAD7",
+            # 2022-11-26 potential long branches
+            "CATALONIA/NSVH101954411/2022_OR_7A633967",
+            "CATALONIA/NSVH510598655/2022_OR_57BCC58B",
+            # 2022-12-02 potential long branches
+            "MISSOURI/19/2022_OR_837C934E",
+            # 2022-12-10 potential long branches
+            "CATALONIA/NSVH560028269/2022_OR_9C189CFA", # 159X 160X 162X 163X 164X
+            "ATHENS/355/2022_D9D4A86A",                 # 38X 40X 41X 42X 59X 67X 81X 128X
+            "MICHIGAN/48/2022_OR_910196F9",
+            "MICHIGAN/UOM10047284024/2022_OR_910196F9",
+            # 2023-01-08 potential long branches
+            "BRITISH_COLUMBIA/PHL-480/2022_OR_EAAE7C01",
+            # 2023-01-13 potential long branch
+            "GALICIA/22034482/2022_2EE89148",
+            "GALICIA/22018524/2022_1EE46082",
+            "GALICIA/22033804/2022_0EE699D6",
+            "GALICIA/21987208/2022_F49E5748",
+            "GALICIA/22036079/2022_430E996A",
+            "GALICIA/22018743/2022_0116B017",
+            "GALICIA/22011636/2022_3D9F40AD",
+            # 2023-01-20 potential long branch
+            "SINGAPORE/GP9117/2022_OR_69761A51",
+            "TURKEY/GIHSN_ANKARA-14020_7/2022_012CD2F7",
+            # 2023-01-28 potential long branches
+            "MICHIGAN/48/2022_OR_910196F9",
+            "MACEDONIA/IPH-MKD-59729/2021_OR_837D0C9D",
+            "KHON_KAEN/FS005/2022_OR_1A5637D8",
+            "TURKEY/GIHSN_ANKARA-14020_7/2022_012CD2F7",
+            "ATHENS/355/2022_D9D4A86",
+            "CATALONIA/NSVH101994299/2023_OR_13A43DA9",
+            "MACEDONIA/IPH-MKD-HD/2021_OR_B52D1807",
+            "SOUTH_AFRICA/R07603/2022_OR_7937FE46",
+            "SOUTH_AFRICA/R05772/2022_OR_552B5F5E",
+            "GALICIA/22034482/2022_2EE89148",
+            "CATALONIA/NSVH560028269/2022_OR_9C189CFA",
+	    # 2023-02-04 potential long branches
+	    "MANITOBA/RV00728/2022_P1_5236154F",
+	    "ONTARIO/198/2022_OR_D1EC3B40",
+	    # 2023-04-21 potential long branches
+	    "ROMANIA/493/2023_612606B9",
+	    "PAKISTAN/1140/2023_OR_3D2D111B",
+	    "CATALONIA/NSVH102045605/2023_OR_7490FFA8",
+            # XXX & long branch 2023-0731
+            "BAHIA/PVM105707/2021_OR_BCE07E46",
+            "SOUTH_AFRICA/K056301/2023_30E0E639",
+            # 2023-09-08 potential long branch ?why
+            "SOUTH_AFRICA/K056301/2023_CC1FE952"
+        ])
+        .remove_hash_duplicates()
+        .replace_with_master()
+        .sort("+date")
+        .move_name_to_beginning(outgroup)
+    )
+    report(subtype, selected, expected_nuc_size=1650, limit=0)
+    export(subtype, selected)
+
+# ----------------------------------------------------------------------
+
+sSubtypeToFunc = {"BVIC": bvic, "BYAM": byam, "A(H1N1)": h1, "A(H3N2)": h3}
+
+# ----------------------------------------------------------------------
+
+def export(subtype: str, selected :ae_backend.seqdb.Selected):
+    fasta.write(filename_or_stream=subtype_dir(subtype).joinpath("source.fas"), selected=selected, aa=False)
+
+# ----------------------------------------------------------------------
+
+def subtype_dir(subtype: str):
+    dir = Path(sSubdirforSubtype[subtype])
+    dir.mkdir(exist_ok=True)
+    return dir
+
+# ----------------------------------------------------------------------
+
+def report(subtype: str, selected :ae_backend.seqdb.Selected, expected_nuc_size: int, limit: int = 20):
+    print(f"{subtype:7s} {len(selected)}")
+    for seq in selected:
+        if len(seq.nuc) != expected_nuc_size:
+            print(f">> wrong size {len(seq.nuc)} of {seq.seq_id()} (expected: {expected_nuc_size}) ")
+    for no, seq in enumerate(selected):
+        if no >= limit:
+            break
+        # print(f"{no:4d} {seq.seq_id():40s} [{seq.date()}] {seq.aa}")
+        print(f"{no:4d} {seq.seq_id():40s} [{seq.date()}]")
+
+# ======================================================================
+
+try:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("subtype", nargs="*", type=lambda src: src.upper())
+    parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", default=False)
+    args = parser.parse_args()
+    exit_code = main(args) or 0
+except Exception as err:
+    print(f"> {err}\n{traceback.format_exc()}", file=sys.stderr)
+    exit_code = 1
+exit(exit_code)
+
+# ======================================================================
diff --git a/proj/weekly-tree/garli-h3 b/proj/weekly-tree/garli-h3
new file mode 100755
index 0000000..49c0e63
--- /dev/null
+++ b/proj/weekly-tree/garli-h3
@@ -0,0 +1,452 @@
+#! /usr/bin/env python3
+
+import sys, os, re, argparse, logging, time, datetime, concurrent.futures, subprocess, random, traceback
+from pathlib import Path
+
+# ======================================================================
+
+sSubtypeNorm = {"BV": "BVIC", "BVIC": "BVIC", "BY": "BYAM", "BYAM": "BYAM", "A(H1N1)": "A(H1N1)", "H1": "A(H1N1)", "1": "A(H1N1)", "A(H3N2)": "A(H3N2)", "H3": "A(H3N2)", "3": "A(H3N2)"}
+sSubdirforSubtype = {"BVIC": "bvic", "BYAM": "byam", "A(H1N1)": "h1", "A(H3N2)": "h3"}
+
+sRandomGen = random.SystemRandom()
+
+# ----------------------------------------------------------------------
+
+def main(args: argparse.Namespace):
+    argv = '"' + '" "'.join(sys.argv) + '"'
+    working_dir = Path(args.subdir).resolve()
+    if args.start_tree:
+        tree = args.start_tree.resolve()
+    else:
+        tree = Path(working_dir.joinpath("raxml-best.txt").open().read().strip()).resolve()
+    if args.source_fasta:
+        source_fasta = args.source_fasta.resolve()
+    else:
+        source_fasta = working_dir.joinpath("source.fas").resolve()
+    try:
+        best_tree_file = garli(working_dir=working_dir, tree=tree, source_fas=source_fasta, num_runs=args.num_runs, stoptime=args.stoptime, slurm_nodelist=args.slurm_nodelist, slurm_exclude_nodes=args.slurm_exclude_nodes, new_output=args.new_output)
+        subprocess.run(["/usr/bin/mail", "-s", f"completed {sys.argv[0]} {working_dir}", "weekly-tree@antigenic-cartography.org"], input=f"cd {working_dir}\n{argv}\n{best_tree_file}\n", text=True)
+        return 0
+    except Exception as err:
+        subprocess.run(["/usr/bin/mail", "-s", f"FAILED {sys.argv[0]} {working_dir}", "weekly-tree@antigenic-cartography.org"], input=f"cd {working_dir}\n{argv}\n\n{traceback.format_exc()}\n", text=True)
+        raise
+
+# ----------------------------------------------------------------------
+
+def garli(working_dir: Path, tree: Path, source_fas: Path, num_runs: int, stoptime: int, slurm_nodelist: str, slurm_exclude_nodes: str, new_output: bool):
+
+    default_required_memory = 15000
+
+    # the number of attachment branches evaluated for each taxon to be added to the tree during the creation of an ML stepwise-addition starting tree (when garli is run without raxml step)
+    attachmentspertaxon =  1000000
+    # termination condition: when no new significantly better scoring topology has been encountered in greater than this number of generations, garli stops
+    genthreshfortopoterm = 20000
+
+    output_dir = get_output_dir(working_dir, "garli.", new_output=new_output)
+
+    garli_cmd = "/syn/bin/garli"
+    srun_cmd = [
+        "srun",
+        "--cpus-per-task=2",
+        # "--nodes=1",
+        "--ntasks=1",
+        "--threads=1",
+        f"--nodelist={slurm_nodelist}",
+    ]
+    if slurm_exclude_nodes:
+        srun_cmd.append(f"--exclude={slurm_exclude_nodes}")
+
+    # ----------------------------------------------------------------------
+
+    def make_conf(run_id, required_memory):
+        """Returns filename of the written conf file"""
+        # if not outgroup or not isinstance(outgroup, list) or not all(isinstance(e, int) and e > 0 for e in outgroup):
+        #     raise ValueError("outgroup must be non-empty list of taxa indices in the fasta file starting with 1")
+        garli_args = {
+            "source": str(source_fas.resolve()),
+            "availablememory": required_memory or default_required_memory,
+            "streefname": str(tree.resolve()),
+            "output_prefix": str(output_dir.joinpath(run_id)),
+            "attachmentspertaxon": attachmentspertaxon,
+            "randseed": sRandomGen.randint(1, 0xFFFFFFF),
+            "genthreshfortopoterm": genthreshfortopoterm,
+            "searchreps": 1,
+            "stoptime": stoptime,
+            "outgroup": "1" # " ".join(str(e) for e in outgroup),
+            }
+        conf = GARLI_CONF.format(**garli_args)
+        conf_filename = output_dir.joinpath(run_id + ".garli.conf")
+        with conf_filename.open("w") as f:
+            f.write(conf)
+        return conf_filename
+
+    # ----------------------------------------------------------------------
+
+    def find_memory_requirements():
+        return default_required_memory
+
+#        sGreat = re.compile(r"\s*great\s+>=\s+(\d+)\s+MB", re.I)
+#        conf_file = make_conf(run_id="find_memory_requirements", required_memory=None)
+#        start = time.time()
+#        proc = subprocess.Popen([garli_cmd, str(conf_file)], stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+#        required_memory = None # default, in case the code below fails or times out
+#        timeout = 300
+#        while required_memory is None:
+#            line = proc.stdout.readline().decode("utf-8")
+#            if mm := sGreat.match(line):
+#                required_memory = int(mm.group(1))
+#            elif (time.time() - start) > timeout:
+#                required_memory = default_required_memory
+#                print(f"WARNING: Cannot obtain memory required by garli in {timeout} seconds: default for {working_dir.name}: {required_memory} will be used", file=sys.stderr)
+#        proc.kill()
+#        return required_memory
+
+    # ----------------------------------------------------------------------
+
+    def run_garli(run_id, required_memory):
+        conf_file = make_conf(run_id=run_id, required_memory=required_memory)
+        # --mem-per-cpu is half of required_memory because we allocate whole 2 cpus per run to prevent from hyperthreading
+        cmd = (srun_cmd + [f"--mem-per-cpu={int(required_memory / 2) + 1}", f"--job-name='garli {working_dir.name} {run_id}'", f"--output={output_dir.joinpath(run_id).with_suffix('.stdout')}", f"--error={output_dir.joinpath(run_id).with_suffix('.stderr')}"]
+               + [garli_cmd, str(conf_file)])
+        with output_dir.joinpath("commands.txt").open("a") as commands_txt:
+            print(" ".join(cmd), file=commands_txt)
+        # print(cmd)
+        subprocess.check_call(cmd)
+
+    # ----------------------------------------------------------------------
+
+    def submit():
+        required_memory = find_memory_requirements()
+        failures = 0
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            future_to_runid = {executor.submit(run_garli, run_id, required_memory): run_id for run_id in (f"{ri:03d}" for ri in range(num_runs))}
+            for future in concurrent.futures.as_completed(future_to_runid):
+                try:
+                    future.result()
+                except Exception as err:
+                    logging.error(f"garli {working_dir.name} {future_to_runid[future]} FAILED: {err}")
+                    failures += 1
+                else:
+                    logging.info(f"garli run {working_dir.name} {future_to_runid[future]} completed")
+        if failures:
+            logging.error(f"garli {working_dir.name} FAILED: {failures} runs failed")
+        else:
+            logging.info("garli completed")
+
+    # ----------------------------------------------------------------------
+
+    def results_available():
+        return len(list(output_dir.glob("*.best.phy"))) > (num_runs / 2)
+
+    # ----------------------------------------------------------------------
+
+    sReFinalScore = re.compile(r"Final\t-(\d+\.\d+)")
+
+    def find_best_result():
+        # scores in results are positive numbers
+        results = [[log_file_name, float(sReFinalScore.search(log_file_name.open().read()).group(1))] for log_file_name in output_dir.glob("*.log00.log")]
+        if not results:
+            raise RuntimeError(f"no results found in {output_dir}/*.log00.log")
+        results.sort(key=lambda en: en[1])
+        output_dir.parent.joinpath("garli-best.txt").open("w").write(str(results[0][0]).replace(".log00.log", ".best.phy"))
+        output_dir.parent.joinpath("garli-all.txt").open("w").write("\n".join(f"{score:13.6f} {str(log_name).replace('.log00.log', '.best.phy')}" for log_name, score in results))
+        return [Path(str(results[0][0]).replace(".log00.log", ".best.phy")), results[0][1]]
+
+    # ----------------------------------------------------------------------
+
+    if not results_available():
+        start = datetime.datetime.now()
+        submit()
+        logging.info(f"[{working_dir}]: garli elapsed: {datetime.datetime.now() - start}")
+    best_tree_file, best_score = find_best_result()
+    logging.info(f"[{working_dir}]: garli best: {best_tree_file}  score: {best_score}")
+    return best_tree_file
+
+# ----------------------------------------------------------------------
+
+GARLI_CONF = """\
+[general]
+datafname = {source}
+streefname = {streefname}
+# Prefix of all output filenames, such as log, treelog, etc. Change
+# this for each run that you do or the program will overwrite previous
+# results.
+ofprefix = {output_prefix}
+
+constraintfile = none
+
+# (1 to infinity) The number of attachment branches evaluated for each
+# taxon to be added to the tree during the creation of an ML
+# stepwise-addition starting tree. Briefly, stepwise addition is an
+# algorithm used to make a tree, and involves adding taxa in a random
+# order to a growing tree. For each taxon to be added, a number of
+# randomly chosen attachment branches are tried and scored, and then
+# the best scoring one is chosen as the location of that taxon. The
+# attachmentspertaxon setting controls how many attachment points are
+# evaluated for each taxon to be added. A value of one is equivalent
+# to a completely random tree (only one randomly chosen location is
+# evaluated). A value of greater than 2 times the number of taxa in
+# the dataset means that all attachment points will be evaluated for
+# each taxon, and will result in very good starting trees (but may
+# take a while on large datasets). Even fairly small values (< 10) can
+# result in starting trees that are much, much better than random, but
+# still fairly different from one another. Default: 50.
+attachmentspertaxon = {attachmentspertaxon}
+
+# -1 - random seed chosen automatically
+randseed = {randseed}
+
+# in Mb
+availablememory = {availablememory}
+
+# The frequency with which the best score is written to the log file, default: 10
+logevery = 1000
+
+# Whether to write three files to disk containing all information
+# about the current state of the population every saveevery
+# generations, with each successive checkpoint overwriting the
+# previous one. These files can be used to restart a run at the last
+# written checkpoint by setting the restart configuration entry. Default: 0
+writecheckpoints = 0
+
+# Whether to restart at a previously saved checkpoint. To use this
+# option the writecheckpoints option must have been used during a
+# previous run. The program will look for checkpoint files that are
+# named based on the ofprefix of the previous run. If you intend to
+# restart a run, NOTHING should be changed in the config file except
+# setting restart to 1. A run that is restarted from checkpoint will
+# give exactly the same results it would have if the run had gone to
+# completion. Default: 0
+restart = 0
+
+# If writecheckpoints or outputcurrentbesttopology are specified, this
+# is the frequency (in generations) at which checkpoints or the
+# current best tree are written to file. default: 100
+saveevery = 1000
+
+# Specifies whether some initial rough optimization is performed on
+# the starting branch lengths and alpha parameter. This is always
+# recommended. Default: 1.
+refinestart = 1
+
+# If true, the current best tree of the current search replicate is
+# written to <ofprefix>.best.current.tre every saveevery
+# generations. In versions before 0.96 the current best topology was
+# always written to file, but that is no longer the case. Seeing the
+# current best tree has no real use apart from satisfying your
+# curiosity about how a run is going. Default: 0.
+outputcurrentbesttopology = 0
+
+# If true, each new topology encountered with a better score than the
+# previous best is written to file. In some cases this can result in
+# really big files (hundreds of MB) though, especially for random
+# starting topologies on large datasets. Note that this file is
+# interesting to get an idea of how the topology changed as the
+# searches progressed, but the collection of trees should NOT be
+# interpreted in any meaningful way. This option is not available
+# while bootstrapping. Default: 0.
+outputeachbettertopology = 0
+
+# Specifies whether the automatic termination conditions will be
+# used. The conditions specified by both of the following two
+# parameters must be met. See the following two parameters for their
+# definitions. If this is false, the run will continue until it
+# reaches the time (stoptime) or generation (stopgen) limit. It is
+# highly recommended that this option be used! Default: 1.
+enforcetermconditions = 1
+
+# This specifies the first part of the termination condition. When no
+# new significantly better scoring topology (see significanttopochange
+# below) has been encountered in greater than this number of
+# generations, this condition is met. Increasing this parameter may
+# improve the lnL scores obtained (especially on large datasets), but
+# will also increase runtimes. Default: 20000.
+genthreshfortopoterm = {genthreshfortopoterm}
+
+# The second part of the termination condition. When the total
+# improvement in score over the last intervallength x intervalstostore
+# generations (default is 500 generations, see below) is less than
+# this value, this condition is met. This does not usually need to be
+# changed. Default: 0.05
+scorethreshforterm = 0.05
+
+# The lnL increase required for a new topology to be considered
+# significant as far as the termination condition is concerned. It
+# probably doesn’t need to be played with, but you might try
+# increasing it slightly if your runs reach a stable score and then
+# take a very long time to terminate due to very minor changes in
+# topology. Default: 0.01
+significanttopochange = 0.01
+
+# Whether a phylip formatted tree files will be output in addition to
+# the default nexus files for the best tree across all replicates
+# (<ofprefix>.best.phy), the best tree for each replicate
+# (<ofprefix>.best.all.phy) or in the case of bootstrapping, the best
+# tree for each bootstrap replicate (<ofprefix.boot.phy>).
+# We use .phy (it's newick tree format), use 1 here!
+outputphyliptree = 1
+
+# Whether to output three files of little general interest: the
+# “fate”, “problog” and “swaplog” files. The fate file shows the
+# parentage, mutation types and scores of every individual in the
+# population during the entire search. The problog shows how the
+# proportions of the different mutation types changed over the course
+# of the run. The swaplog shows the number of unique swaps and the
+# number of total swaps on the current best tree over the course of
+# the run. Default: 0
+outputmostlyuselessfiles = 0
+
+# This option allow for orienting the tree topologies in a consistent
+# way when they are written to file. Note that this has NO effect
+# whatsoever on the actual inference and the specified outgroup is NOT
+# constrained to be present in the inferred trees. If multiple
+# outgroup taxa are specified and they do not form a monophyletic
+# group, this setting will be ignored. If you specify a single
+# outgroup taxon it will always be present, and the tree will always
+# be consistently oriented. To specify an outgroup consisting of taxa
+# 1, 3 and 5 the format is this: outgroup = 1 3 5
+outgroup = {outgroup}
+
+resampleproportion = 1.0
+inferinternalstateprobs = 0
+outputsitelikelihoods = 0
+optimizeinputonly = 0
+collapsebranches = 1
+
+# The number of independent search replicates to perform during a
+# program execution. You should always either do multiple search
+# replicates or multiple program executions with any dataset to get a
+# feel for whether you are getting consistent results, which suggests
+# that the program is doing a decent job of searching. Note that if
+# this is > 1 and you are performing a bootstrap analysis, this is the
+# number of search replicates to be done per bootstrap replicate. That
+# can increase the chance of finding the best tree per bootstrap
+# replicate, but will also increase bootstrap runtimes
+# enormously. Default: 2
+searchreps = {searchreps}
+
+bootstrapreps = 0
+
+# ------------------- FOR NUCLEOTIDES --------------
+datatype=nucleotide
+
+# The number of relative substitution rate parameters (note that the
+# number of free parameters is this value minus one). Equivalent to
+# the “nst” setting in PAUP* and MrBayes. 1rate assumes that
+# substitutions between all pairs of nucleotides occur at the same
+# rate, 2rate allows different rates for transitions and
+# transversions, and 6rate allows a different rate between each
+# nucleotide pair. These rates are estimated unless the fixed option
+# is chosen. New in version 0.96, parameters for any submodel of the
+# GTR model may now be estimated. The format for specifying this is
+# very similar to that used in the “rclass’ setting of PAUP*. Within
+# parentheses, six letters are specified, with spaces between
+# them. The six letters represent the rates of substitution between
+# the six pairs of nucleotides, with the order being A-C, A-G, A-T,
+# C-G, C-T and G-T. Letters within the parentheses that are the same
+# mean that a single parameter is shared by multiple nucleotide
+# pairs. For example, ratematrix = (a b a a b a) would specify the HKY
+# 2-rate model (equivalent to ratematrix = 2rate). This entry,
+# ratematrix = (a b c c b a) would specify 3 estimated rates of
+# subtitution, with one rate shared by A-C and G-T substitutions,
+# another rate shared by A-G and C-T substitutions, and the final rate
+# shared by A-T and C-G substitutions. Default: 6rate
+ratematrix = 6rate
+
+# (equal, empirical, estimate, fixed) Specifies how the equilibrium
+# state frequencies (A, C, G and T) are treated. The empirical setting
+# fixes the frequencies at their observed proportions, and the other
+# options should be self-explanatory. Default: estimate
+statefrequencies = estimate
+
+# (none, gamma, gammafixed) – The model of rate heterogeneity
+# assumed. “gammafixed” requires that the alpha shape parameter is
+# provided, and a setting of “gamma” estimates it. Default: gamma
+ratehetmodel = gamma
+
+# (1 to 20) – The number of categories of variable rates (not
+# including the invariant site class if it is being used). Must be set
+# to 1 if ratehetmodel is set to none. Note that runtimes and memory
+# usage scale linearly with this setting. Default: 4
+numratecats = 4
+
+# (none, estimate, fixed) Specifies whether a parameter representing
+# the proportion of sites that are unable to change (i.e. have a
+# substitution rate of zero) will be included. This is typically
+# referred to as “invariant sites”, but would better be termed
+# “invariable sites”. Default: estimate
+invariantsites = estimate
+
+# ----------------------------------------------------------------------
+
+[master]
+nindivs = 4
+holdover = 1
+selectionintensity = .5
+holdoverpenalty = 0
+
+# The maximum number of generations to run.  Note that this supersedes
+# the automated stopping criterion (see enforcetermconditions above),
+# and should therefore be set to a very large value if automatic
+# termination is desired.
+stopgen = 1000000
+
+# The maximum number of seconds for the run to continue.  Note that
+# this supersedes the automated stopping criterion (see
+# enforcetermconditions above), and should therefore be set to a very
+# large value if automatic termination is desired.
+stoptime = {stoptime}
+
+startoptprec = 0.5
+minoptprec = 0.01
+numberofprecreductions = 20
+treerejectionthreshold = 50.0
+topoweight = 1.0
+modweight = 0.05
+brlenweight = 0.2
+randnniweight = 0.1
+randsprweight = 0.3
+limsprweight =  0.6
+intervallength = 100
+intervalstostore = 5
+
+limsprrange = 6
+meanbrlenmuts = 5
+gammashapebrlen = 1000
+gammashapemodel = 1000
+uniqueswapbias = 0.1
+distanceswapbias = 1.0
+"""
+
+# ----------------------------------------------------------------------
+
+def get_output_dir(working_dir: Path, prefix: str, new_output: bool):
+    if not new_output and (output_dirs := list(working_dir.glob(prefix + "*"))):
+        output_dir = max(output_dirs)
+    else:
+        output_dir = working_dir.joinpath(prefix + datetime.datetime.now().strftime('%Y-%m%d-%H%M%S'))
+        output_dir.mkdir()
+        output_dir.chmod(0o777)
+    return output_dir
+
+# ======================================================================
+
+try:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("subdir")
+    parser.add_argument("-n", "--num-runs", type=int, default=16)
+    parser.add_argument("-s", "--stoptime", default=60*60, type=int, help="termination condition: the maximum number of seconds for the run to continue")
+    parser.add_argument("-t", "--start-tree", default=None, type=Path)
+    parser.add_argument("-f", "--source-fasta", default=None, type=Path)
+    parser.add_argument("--new-output", action="store_true", default=False)
+    parser.add_argument("--node", dest="slurm_nodelist", default="i22", help="one node to run on, to run on multiple nodes use --exclude-nodes")
+    parser.add_argument("--exclude-nodes", dest="slurm_exclude_nodes", default=None)
+    args = parser.parse_args()
+    logging.basicConfig(level=logging.DEBUG, format="%(levelname)s %(asctime)s: %(message)s")
+    exit_code = main(args) or 0
+except Exception as err:
+    logging.error(f"> {err}\n{traceback.format_exc()}")
+    exit_code = 1
+exit(exit_code)
+
+# ======================================================================
diff --git a/proj/weekly-tree/garli-memvar b/proj/weekly-tree/garli-memvar
new file mode 100755
index 0000000..35dd4ee
--- /dev/null
+++ b/proj/weekly-tree/garli-memvar
@@ -0,0 +1,452 @@
+#! /usr/bin/env python3
+
+import sys, os, re, argparse, logging, time, datetime, concurrent.futures, subprocess, random, traceback
+from pathlib import Path
+
+# ======================================================================
+
+sSubtypeNorm = {"BV": "BVIC", "BVIC": "BVIC", "BY": "BYAM", "BYAM": "BYAM", "A(H1N1)": "A(H1N1)", "H1": "A(H1N1)", "1": "A(H1N1)", "A(H3N2)": "A(H3N2)", "H3": "A(H3N2)", "3": "A(H3N2)"}
+sSubdirforSubtype = {"BVIC": "bvic", "BYAM": "byam", "A(H1N1)": "h1", "A(H3N2)": "h3"}
+
+sRandomGen = random.SystemRandom()
+
+# ----------------------------------------------------------------------
+
+def main(args: argparse.Namespace):
+    argv = '"' + '" "'.join(sys.argv) + '"'
+    working_dir = Path(args.subdir).resolve()
+    if args.start_tree:
+        tree = args.start_tree.resolve()
+    else:
+        tree = Path(working_dir.joinpath("raxml-best.txt").open().read().strip()).resolve()
+    if args.source_fasta:
+        source_fasta = args.source_fasta.resolve()
+    else:
+        source_fasta = working_dir.joinpath("source.fas").resolve()
+    try:
+        best_tree_file = garli(working_dir=working_dir, tree=tree, source_fas=source_fasta, num_runs=args.num_runs, stoptime=args.stoptime, slurm_nodelist=args.slurm_nodelist, slurm_exclude_nodes=args.slurm_exclude_nodes, new_output=args.new_output)
+        subprocess.run(["/usr/bin/mail", "-s", f"completed {sys.argv[0]} {working_dir}", "weekly-tree@antigenic-cartography.org"], input=f"cd {working_dir}\n{argv}\n{best_tree_file}\n", text=True)
+        return 0
+    except Exception as err:
+        subprocess.run(["/usr/bin/mail", "-s", f"FAILED {sys.argv[0]} {working_dir}", "weekly-tree@antigenic-cartography.org"], input=f"cd {working_dir}\n{argv}\n\n{traceback.format_exc()}\n", text=True)
+        raise
+
+# ----------------------------------------------------------------------
+
+def garli(working_dir: Path, tree: Path, source_fas: Path, num_runs: int, stoptime: int, slurm_nodelist: str, slurm_exclude_nodes: str, new_output: bool):
+
+    default_required_memory = 15800
+
+    # the number of attachment branches evaluated for each taxon to be added to the tree during the creation of an ML stepwise-addition starting tree (when garli is run without raxml step)
+    attachmentspertaxon =  1000000
+    # termination condition: when no new significantly better scoring topology has been encountered in greater than this number of generations, garli stops
+    genthreshfortopoterm = 20000
+
+    output_dir = get_output_dir(working_dir, "garli.", new_output=new_output)
+
+    garli_cmd = "/syn/bin/garli"
+    srun_cmd = [
+        "srun",
+        "--cpus-per-task=2",
+        # "--nodes=1",
+        "--ntasks=1",
+        "--threads=1",
+        f"--nodelist={slurm_nodelist}",
+    ]
+    if slurm_exclude_nodes:
+        srun_cmd.append(f"--exclude={slurm_exclude_nodes}")
+
+    # ----------------------------------------------------------------------
+
+    def make_conf(run_id, required_memory):
+        """Returns filename of the written conf file"""
+        # if not outgroup or not isinstance(outgroup, list) or not all(isinstance(e, int) and e > 0 for e in outgroup):
+        #     raise ValueError("outgroup must be non-empty list of taxa indices in the fasta file starting with 1")
+        garli_args = {
+            "source": str(source_fas.resolve()),
+            "availablememory": required_memory or default_required_memory,
+            "streefname": str(tree.resolve()),
+            "output_prefix": str(output_dir.joinpath(run_id)),
+            "attachmentspertaxon": attachmentspertaxon,
+            "randseed": sRandomGen.randint(1, 0xFFFFFFF),
+            "genthreshfortopoterm": genthreshfortopoterm,
+            "searchreps": 1,
+            "stoptime": stoptime,
+            "outgroup": "1" # " ".join(str(e) for e in outgroup),
+            }
+        conf = GARLI_CONF.format(**garli_args)
+        conf_filename = output_dir.joinpath(run_id + ".garli.conf")
+        with conf_filename.open("w") as f:
+            f.write(conf)
+        return conf_filename
+
+    # ----------------------------------------------------------------------
+
+    def find_memory_requirements():
+        return default_required_memory
+
+#        sGreat = re.compile(r"\s*great\s+>=\s+(\d+)\s+MB", re.I)
+#        conf_file = make_conf(run_id="find_memory_requirements", required_memory=None)
+#        start = time.time()
+#        proc = subprocess.Popen([garli_cmd, str(conf_file)], stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+#        required_memory = None # default, in case the code below fails or times out
+#        timeout = 300
+#        while required_memory is None:
+#            line = proc.stdout.readline().decode("utf-8")
+#            if mm := sGreat.match(line):
+#                required_memory = int(mm.group(1))
+#            elif (time.time() - start) > timeout:
+#                required_memory = default_required_memory
+#                print(f"WARNING: Cannot obtain memory required by garli in {timeout} seconds: default for {working_dir.name}: {required_memory} will be used", file=sys.stderr)
+#        proc.kill()
+#        return required_memory
+
+    # ----------------------------------------------------------------------
+
+    def run_garli(run_id, required_memory):
+        conf_file = make_conf(run_id=run_id, required_memory=required_memory)
+        # --mem-per-cpu is half of required_memory because we allocate whole 2 cpus per run to prevent from hyperthreading
+        cmd = (srun_cmd + [f"--mem-per-cpu={int(required_memory / 2) + 1}", f"--job-name='garli {working_dir.name} {run_id}'", f"--output={output_dir.joinpath(run_id).with_suffix('.stdout')}", f"--error={output_dir.joinpath(run_id).with_suffix('.stderr')}"]
+               + [garli_cmd, str(conf_file)])
+        with output_dir.joinpath("commands.txt").open("a") as commands_txt:
+            print(" ".join(cmd), file=commands_txt)
+        # print(cmd)
+        subprocess.check_call(cmd)
+
+    # ----------------------------------------------------------------------
+
+    def submit():
+        required_memory = find_memory_requirements()
+        failures = 0
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            future_to_runid = {executor.submit(run_garli, run_id, required_memory): run_id for run_id in (f"{ri:03d}" for ri in range(num_runs))}
+            for future in concurrent.futures.as_completed(future_to_runid):
+                try:
+                    future.result()
+                except Exception as err:
+                    logging.error(f"garli {working_dir.name} {future_to_runid[future]} FAILED: {err}")
+                    failures += 1
+                else:
+                    logging.info(f"garli run {working_dir.name} {future_to_runid[future]} completed")
+        if failures:
+            logging.error(f"garli {working_dir.name} FAILED: {failures} runs failed")
+        else:
+            logging.info("garli completed")
+
+    # ----------------------------------------------------------------------
+
+    def results_available():
+        return len(list(output_dir.glob("*.best.phy"))) > (num_runs / 2)
+
+    # ----------------------------------------------------------------------
+
+    sReFinalScore = re.compile(r"Final\t-(\d+\.\d+)")
+
+    def find_best_result():
+        # scores in results are positive numbers
+        results = [[log_file_name, float(sReFinalScore.search(log_file_name.open().read()).group(1))] for log_file_name in output_dir.glob("*.log00.log")]
+        if not results:
+            raise RuntimeError(f"no results found in {output_dir}/*.log00.log")
+        results.sort(key=lambda en: en[1])
+        output_dir.parent.joinpath("garli-best.txt").open("w").write(str(results[0][0]).replace(".log00.log", ".best.phy"))
+        output_dir.parent.joinpath("garli-all.txt").open("w").write("\n".join(f"{score:13.6f} {str(log_name).replace('.log00.log', '.best.phy')}" for log_name, score in results))
+        return [Path(str(results[0][0]).replace(".log00.log", ".best.phy")), results[0][1]]
+
+    # ----------------------------------------------------------------------
+
+    if not results_available():
+        start = datetime.datetime.now()
+        submit()
+        logging.info(f"[{working_dir}]: garli elapsed: {datetime.datetime.now() - start}")
+    best_tree_file, best_score = find_best_result()
+    logging.info(f"[{working_dir}]: garli best: {best_tree_file}  score: {best_score}")
+    return best_tree_file
+
+# ----------------------------------------------------------------------
+
+GARLI_CONF = """\
+[general]
+datafname = {source}
+streefname = {streefname}
+# Prefix of all output filenames, such as log, treelog, etc. Change
+# this for each run that you do or the program will overwrite previous
+# results.
+ofprefix = {output_prefix}
+
+constraintfile = none
+
+# (1 to infinity) The number of attachment branches evaluated for each
+# taxon to be added to the tree during the creation of an ML
+# stepwise-addition starting tree. Briefly, stepwise addition is an
+# algorithm used to make a tree, and involves adding taxa in a random
+# order to a growing tree. For each taxon to be added, a number of
+# randomly chosen attachment branches are tried and scored, and then
+# the best scoring one is chosen as the location of that taxon. The
+# attachmentspertaxon setting controls how many attachment points are
+# evaluated for each taxon to be added. A value of one is equivalent
+# to a completely random tree (only one randomly chosen location is
+# evaluated). A value of greater than 2 times the number of taxa in
+# the dataset means that all attachment points will be evaluated for
+# each taxon, and will result in very good starting trees (but may
+# take a while on large datasets). Even fairly small values (< 10) can
+# result in starting trees that are much, much better than random, but
+# still fairly different from one another. Default: 50.
+attachmentspertaxon = {attachmentspertaxon}
+
+# -1 - random seed chosen automatically
+randseed = {randseed}
+
+# in Mb
+availablememory = {availablememory}
+
+# The frequency with which the best score is written to the log file, default: 10
+logevery = 1000
+
+# Whether to write three files to disk containing all information
+# about the current state of the population every saveevery
+# generations, with each successive checkpoint overwriting the
+# previous one. These files can be used to restart a run at the last
+# written checkpoint by setting the restart configuration entry. Default: 0
+writecheckpoints = 0
+
+# Whether to restart at a previously saved checkpoint. To use this
+# option the writecheckpoints option must have been used during a
+# previous run. The program will look for checkpoint files that are
+# named based on the ofprefix of the previous run. If you intend to
+# restart a run, NOTHING should be changed in the config file except
+# setting restart to 1. A run that is restarted from checkpoint will
+# give exactly the same results it would have if the run had gone to
+# completion. Default: 0
+restart = 0
+
+# If writecheckpoints or outputcurrentbesttopology are specified, this
+# is the frequency (in generations) at which checkpoints or the
+# current best tree are written to file. default: 100
+saveevery = 1000
+
+# Specifies whether some initial rough optimization is performed on
+# the starting branch lengths and alpha parameter. This is always
+# recommended. Default: 1.
+refinestart = 1
+
+# If true, the current best tree of the current search replicate is
+# written to <ofprefix>.best.current.tre every saveevery
+# generations. In versions before 0.96 the current best topology was
+# always written to file, but that is no longer the case. Seeing the
+# current best tree has no real use apart from satisfying your
+# curiosity about how a run is going. Default: 0.
+outputcurrentbesttopology = 0
+
+# If true, each new topology encountered with a better score than the
+# previous best is written to file. In some cases this can result in
+# really big files (hundreds of MB) though, especially for random
+# starting topologies on large datasets. Note that this file is
+# interesting to get an idea of how the topology changed as the
+# searches progressed, but the collection of trees should NOT be
+# interpreted in any meaningful way. This option is not available
+# while bootstrapping. Default: 0.
+outputeachbettertopology = 0
+
+# Specifies whether the automatic termination conditions will be
+# used. The conditions specified by both of the following two
+# parameters must be met. See the following two parameters for their
+# definitions. If this is false, the run will continue until it
+# reaches the time (stoptime) or generation (stopgen) limit. It is
+# highly recommended that this option be used! Default: 1.
+enforcetermconditions = 1
+
+# This specifies the first part of the termination condition. When no
+# new significantly better scoring topology (see significanttopochange
+# below) has been encountered in greater than this number of
+# generations, this condition is met. Increasing this parameter may
+# improve the lnL scores obtained (especially on large datasets), but
+# will also increase runtimes. Default: 20000.
+genthreshfortopoterm = {genthreshfortopoterm}
+
+# The second part of the termination condition. When the total
+# improvement in score over the last intervallength x intervalstostore
+# generations (default is 500 generations, see below) is less than
+# this value, this condition is met. This does not usually need to be
+# changed. Default: 0.05
+scorethreshforterm = 0.05
+
+# The lnL increase required for a new topology to be considered
+# significant as far as the termination condition is concerned. It
+# probably doesn’t need to be played with, but you might try
+# increasing it slightly if your runs reach a stable score and then
+# take a very long time to terminate due to very minor changes in
+# topology. Default: 0.01
+significanttopochange = 0.01
+
+# Whether a phylip formatted tree files will be output in addition to
+# the default nexus files for the best tree across all replicates
+# (<ofprefix>.best.phy), the best tree for each replicate
+# (<ofprefix>.best.all.phy) or in the case of bootstrapping, the best
+# tree for each bootstrap replicate (<ofprefix.boot.phy>).
+# We use .phy (it's newick tree format), use 1 here!
+outputphyliptree = 1
+
+# Whether to output three files of little general interest: the
+# “fate”, “problog” and “swaplog” files. The fate file shows the
+# parentage, mutation types and scores of every individual in the
+# population during the entire search. The problog shows how the
+# proportions of the different mutation types changed over the course
+# of the run. The swaplog shows the number of unique swaps and the
+# number of total swaps on the current best tree over the course of
+# the run. Default: 0
+outputmostlyuselessfiles = 0
+
+# This option allow for orienting the tree topologies in a consistent
+# way when they are written to file. Note that this has NO effect
+# whatsoever on the actual inference and the specified outgroup is NOT
+# constrained to be present in the inferred trees. If multiple
+# outgroup taxa are specified and they do not form a monophyletic
+# group, this setting will be ignored. If you specify a single
+# outgroup taxon it will always be present, and the tree will always
+# be consistently oriented. To specify an outgroup consisting of taxa
+# 1, 3 and 5 the format is this: outgroup = 1 3 5
+outgroup = {outgroup}
+
+resampleproportion = 1.0
+inferinternalstateprobs = 0
+outputsitelikelihoods = 0
+optimizeinputonly = 0
+collapsebranches = 1
+
+# The number of independent search replicates to perform during a
+# program execution. You should always either do multiple search
+# replicates or multiple program executions with any dataset to get a
+# feel for whether you are getting consistent results, which suggests
+# that the program is doing a decent job of searching. Note that if
+# this is > 1 and you are performing a bootstrap analysis, this is the
+# number of search replicates to be done per bootstrap replicate. That
+# can increase the chance of finding the best tree per bootstrap
+# replicate, but will also increase bootstrap runtimes
+# enormously. Default: 2
+searchreps = {searchreps}
+
+bootstrapreps = 0
+
+# ------------------- FOR NUCLEOTIDES --------------
+datatype=nucleotide
+
+# The number of relative substitution rate parameters (note that the
+# number of free parameters is this value minus one). Equivalent to
+# the “nst” setting in PAUP* and MrBayes. 1rate assumes that
+# substitutions between all pairs of nucleotides occur at the same
+# rate, 2rate allows different rates for transitions and
+# transversions, and 6rate allows a different rate between each
+# nucleotide pair. These rates are estimated unless the fixed option
+# is chosen. New in version 0.96, parameters for any submodel of the
+# GTR model may now be estimated. The format for specifying this is
+# very similar to that used in the “rclass’ setting of PAUP*. Within
+# parentheses, six letters are specified, with spaces between
+# them. The six letters represent the rates of substitution between
+# the six pairs of nucleotides, with the order being A-C, A-G, A-T,
+# C-G, C-T and G-T. Letters within the parentheses that are the same
+# mean that a single parameter is shared by multiple nucleotide
+# pairs. For example, ratematrix = (a b a a b a) would specify the HKY
+# 2-rate model (equivalent to ratematrix = 2rate). This entry,
+# ratematrix = (a b c c b a) would specify 3 estimated rates of
+# subtitution, with one rate shared by A-C and G-T substitutions,
+# another rate shared by A-G and C-T substitutions, and the final rate
+# shared by A-T and C-G substitutions. Default: 6rate
+ratematrix = 6rate
+
+# (equal, empirical, estimate, fixed) Specifies how the equilibrium
+# state frequencies (A, C, G and T) are treated. The empirical setting
+# fixes the frequencies at their observed proportions, and the other
+# options should be self-explanatory. Default: estimate
+statefrequencies = estimate
+
+# (none, gamma, gammafixed) – The model of rate heterogeneity
+# assumed. “gammafixed” requires that the alpha shape parameter is
+# provided, and a setting of “gamma” estimates it. Default: gamma
+ratehetmodel = gamma
+
+# (1 to 20) – The number of categories of variable rates (not
+# including the invariant site class if it is being used). Must be set
+# to 1 if ratehetmodel is set to none. Note that runtimes and memory
+# usage scale linearly with this setting. Default: 4
+numratecats = 4
+
+# (none, estimate, fixed) Specifies whether a parameter representing
+# the proportion of sites that are unable to change (i.e. have a
+# substitution rate of zero) will be included. This is typically
+# referred to as “invariant sites”, but would better be termed
+# “invariable sites”. Default: estimate
+invariantsites = estimate
+
+# ----------------------------------------------------------------------
+
+[master]
+nindivs = 4
+holdover = 1
+selectionintensity = .5
+holdoverpenalty = 0
+
+# The maximum number of generations to run.  Note that this supersedes
+# the automated stopping criterion (see enforcetermconditions above),
+# and should therefore be set to a very large value if automatic
+# termination is desired.
+stopgen = 1000000
+
+# The maximum number of seconds for the run to continue.  Note that
+# this supersedes the automated stopping criterion (see
+# enforcetermconditions above), and should therefore be set to a very
+# large value if automatic termination is desired.
+stoptime = {stoptime}
+
+startoptprec = 0.5
+minoptprec = 0.01
+numberofprecreductions = 20
+treerejectionthreshold = 50.0
+topoweight = 1.0
+modweight = 0.05
+brlenweight = 0.2
+randnniweight = 0.1
+randsprweight = 0.3
+limsprweight =  0.6
+intervallength = 100
+intervalstostore = 5
+
+limsprrange = 6
+meanbrlenmuts = 5
+gammashapebrlen = 1000
+gammashapemodel = 1000
+uniqueswapbias = 0.1
+distanceswapbias = 1.0
+"""
+
+# ----------------------------------------------------------------------
+
+def get_output_dir(working_dir: Path, prefix: str, new_output: bool):
+    if not new_output and (output_dirs := list(working_dir.glob(prefix + "*"))):
+        output_dir = max(output_dirs)
+    else:
+        output_dir = working_dir.joinpath(prefix + datetime.datetime.now().strftime('%Y-%m%d-%H%M%S'))
+        output_dir.mkdir()
+        output_dir.chmod(0o777)
+    return output_dir
+
+# ======================================================================
+
+try:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("subdir")
+    parser.add_argument("-n", "--num-runs", type=int, default=16)
+    parser.add_argument("-s", "--stoptime", default=60*60, type=int, help="termination condition: the maximum number of seconds for the run to continue")
+    parser.add_argument("-t", "--start-tree", default=None, type=Path)
+    parser.add_argument("-f", "--source-fasta", default=None, type=Path)
+    parser.add_argument("--new-output", action="store_true", default=False)
+    parser.add_argument("--node", dest="slurm_nodelist", default="i22", help="one node to run on, to run on multiple nodes use --exclude-nodes")
+    parser.add_argument("--exclude-nodes", dest="slurm_exclude_nodes", default=None)
+    args = parser.parse_args()
+    logging.basicConfig(level=logging.DEBUG, format="%(levelname)s %(asctime)s: %(message)s")
+    exit_code = main(args) or 0
+except Exception as err:
+    logging.error(f"> {err}\n{traceback.format_exc()}")
+    exit_code = 1
+exit(exit_code)
+
+# ======================================================================
diff --git a/proj/weekly-tree/init b/proj/weekly-tree/init
index 8939b6e..a8dc0f1 100755
--- a/proj/weekly-tree/init
+++ b/proj/weekly-tree/init
@@ -31,11 +31,11 @@ def init_subtype(subtype: str, previous_dir: Path):
     subtype_dir = TODAY_DIR.joinpath(subtype)
     subtype_dir.mkdir(exist_ok=True)
     if not (previous_newick := subtype_dir.joinpath("previous.newick")).exists():
-        previous_raxml_best = previous_dir.joinpath(subtype, "raxml-best.txt")
-        if not previous_raxml_best.exists():
-            print(f">> no previous raxml best run report: {previous_raxml_best}", file=sys.stderr)
+        previous_tree = previous_dir.joinpath(subtype, "cmaple_tree.txt")
+        if not previous_tree.exists():
+            print(f">> no previous tree run report: {previous_tree}", file=sys.stderr)
             return
-        previous_newick_link_target = Path(previous_raxml_best.open().read().strip())
+        previous_newick_link_target = Path(previous_tree.open().read().strip())
         link = relative_path(previous_newick_link_target, subtype_dir)
         print(f">>> {previous_newick} -> {link}", file=sys.stderr)
         previous_newick.symlink_to(link)
diff --git a/proj/weekly-tree/init-h3 b/proj/weekly-tree/init-h3
new file mode 100755
index 0000000..36d521e
--- /dev/null
+++ b/proj/weekly-tree/init-h3
@@ -0,0 +1,93 @@
+#! /usr/bin/env python3
+import sys, os, datetime, subprocess
+from pathlib import Path
+
+WEEKLY_TREE_ROOT_DIR = Path("/syn/eu/ac/results/trees")
+TODAY = datetime.date.today().strftime("%Y-%m%d")
+TODAY_DIR = WEEKLY_TREE_ROOT_DIR.joinpath(TODAY)
+SUBTYPES = ["h3"]
+
+CMD_DIR = Path(sys.argv[0]).parent
+EXPORT_CMD = CMD_DIR.joinpath("export")
+UPDATE_TREE_CMD = CMD_DIR.joinpath("update-tree")
+
+# ======================================================================
+
+def main():
+    previous_dir = find_previous_dir()
+    print(f">>> previous dir: {previous_dir}", file=sys.stderr)
+    init_today_dir(previous_dir)
+
+# ----------------------------------------------------------------------
+
+def init_today_dir(previous_dir: Path):
+    TODAY_DIR.mkdir(exist_ok=True)
+    for subtype in SUBTYPES:
+        init_subtype(subtype, previous_dir=previous_dir)
+
+# ----------------------------------------------------------------------
+
+def init_subtype(subtype: str, previous_dir: Path):
+    subtype_dir = TODAY_DIR.joinpath(subtype)
+    subtype_dir.mkdir(exist_ok=True)
+    if not (previous_newick := subtype_dir.joinpath("previous.newick")).exists():
+        previous_raxml_best = previous_dir.joinpath(subtype, "raxml-best.txt")
+        if not previous_raxml_best.exists():
+            print(f">> no previous raxml best run report: {previous_raxml_best}", file=sys.stderr)
+            return
+        previous_newick_link_target = Path(previous_raxml_best.open().read().strip())
+        link = relative_path(previous_newick_link_target, subtype_dir)
+        print(f">>> {previous_newick} -> {link}", file=sys.stderr)
+        previous_newick.symlink_to(link)
+
+    if not (source_fas := subtype_dir.joinpath("source.fas")).exists():
+        export_source_fas(subtype=subtype, source_fas=source_fas, previous_dir=previous_dir)
+
+    if not (source_newick := subtype_dir.joinpath("source.newick")).exists():
+        update_source_newick(subtype=subtype, source_newick=source_newick, previous_dir=previous_dir)
+
+# ----------------------------------------------------------------------
+
+def update_source_newick(subtype: str, source_newick: Path, previous_dir: Path):
+    print(f">>> updating {source_newick}", file=sys.stderr)
+    log = subprocess.check_output([UPDATE_TREE_CMD, subtype], cwd=source_newick.parents[1]).decode("utf-8")
+    source_newick.with_name("update-tree.log").open("w").write(log)
+    print(log)
+
+# ----------------------------------------------------------------------
+
+def export_source_fas(subtype: str, source_fas: Path, previous_dir: Path):
+    print(f">>> exporting {source_fas}", file=sys.stderr)
+    subprocess.check_call([EXPORT_CMD, subtype], cwd=source_fas.parents[1])
+    subprocess.check_call(["fasta-size", source_fas, previous_dir.joinpath(subtype, source_fas.name)])
+
+# ----------------------------------------------------------------------
+
+def relative_path(target: Path, source_dir: Path):
+    # print(target.parts, source_dir.parts)
+    for initial_parts in range(min(len(target.parts), len(source_dir.parts))):
+        # print(initial_parts, target.parts[initial_parts], source_dir.parts[initial_parts], target.parts[initial_parts] == source_dir.parts[initial_parts])
+        if target.parts[initial_parts] != source_dir.parts[initial_parts]:
+            break
+    # print(initial_parts, target.parts[initial_parts:], source_dir.parts[initial_parts:])
+    return Path(*([".."] * (len(source_dir.parts) - initial_parts)), *target.parts[initial_parts:])
+
+# ----------------------------------------------------------------------
+
+def find_previous_dir():
+    dirs = list(WEEKLY_TREE_ROOT_DIR.glob("2*"))
+    found = None
+    use = len(dirs) - 1
+    while not found and use >= 0:
+        use_date = dirs[use].stem[:9]
+        if use_date != TODAY:
+            found = dirs[use]
+        use -= 1
+    if not found:
+        dirs_s = "\n    ".join(str(dd) for dd in dirs)
+        raise RuntimeError(f"previous dir not found in\n    {dirs_s}")
+    return found
+
+# ======================================================================
+
+main()
diff --git a/proj/weekly-tree/make-cmaple b/proj/weekly-tree/make-cmaple
new file mode 100755
index 0000000..3509efe
--- /dev/null
+++ b/proj/weekly-tree/make-cmaple
@@ -0,0 +1,143 @@
+#! /usr/bin/env python3
+
+import sys, os, re, argparse, logging, datetime, concurrent.futures, subprocess, random, traceback
+from pathlib import Path
+
+# ======================================================================
+
+sSubtypeNorm = {"BV": "BVIC", "BVIC": "BVIC", "BY": "BYAM", "BYAM": "BYAM", "A(H1N1)": "A(H1N1)", "H1": "A(H1N1)", "1": "A(H1N1)", "A(H3N2)": "A(H3N2)", "H3": "A(H3N2)", "3": "A(H3N2)"}
+sSubdirforSubtype = {"BVIC": "bvic", "BYAM": "byam", "A(H1N1)": "h1", "A(H3N2)": "h3"}
+
+sRandomGen = random.SystemRandom()
+
+sMailTo = "weekly-tree@antigenic-cartography.org"
+
+# ----------------------------------------------------------------------
+
+def main(args: argparse.Namespace):
+    argv = '"' + '" "'.join(sys.argv) + '"'
+    working_dir = Path(args.subdir).resolve()
+    source_fas = working_dir.joinpath("source.fas")
+    # source_tree = working_dir.joinpath("source.newick") no source tree, we make from scratch each time
+    outgroup = source_fas.open().readline().strip()[1:]
+    try:
+        best_tree_file = cmaple(working_dir=working_dir, source_fas=source_fas, outgroup=outgroup, num_runs=args.num_runs, slurm_nodelist=args.slurm_nodelist, slurm_exclude_nodes=args.slurm_exclude_nodes)
+        subprocess.run(["/usr/bin/mail", "-s", f"completed {sys.argv[0]} {working_dir}", sMailTo], input=f"cd {working_dir}\n{argv}\n{best_tree_file}\n", text=True)
+        logging.info(f"mail to {sMailTo} about completion sent")
+        return 0
+    except Exception as err:
+        logging.error(f"", exc_info=True)
+        subprocess.run(["/usr/bin/mail", "-s", f"FAILED {sys.argv[0]} {working_dir}", sMailTo], input=f"cd {working_dir}\n{argv}\n\n{traceback.format_exc()}\n", text=True)
+        logging.info(f"mail to {sMailTo} about FAILURE sent")
+        raise
+
+# ----------------------------------------------------------------------
+
+def cmaple(working_dir: Path, source_fas: Path, outgroup: str, slurm_nodelist: str, slurm_exclude_nodes: str, num_runs: int):
+
+    output_dir = get_output_dir(working_dir, "cmaple.")
+
+    cmaple_cmd = [
+        "/syn/bin/cmaple",
+        "-aln", str(source_fas),
+        "-st", "DNA",
+        "-m", "GTR",
+        "--search", "EXHAUSTIVE",
+        "--min-blength", "0.0001",
+        "-nt", "1",
+        "--overwrite"
+    ]
+
+    # ----------------------------------------------------------------------
+
+    def prefix(run_id):
+        return f"{output_dir.joinpath(run_id)}"
+
+    def run_cmaple(run_id):
+        cmd = (srun_cmd()
+               + [f"--job-name=vcm_cmaple {working_dir.name} {run_id}", f"--output={output_dir.joinpath(run_id).with_suffix('.stdout')}", f"--error={output_dir.joinpath(run_id).with_suffix('.stderr')}"]
+               + cmaple_cmd
+               + ["--prefix", prefix(run_id), "--seed", str(sRandomGen.randint(1, 0xFFFFFFF))])
+        with output_dir.joinpath("commands.txt").open("a") as commands_txt:
+            print(" ".join(cmd), file=commands_txt)
+        # print(cmd)
+        subprocess.check_call(cmd)
+
+    def srun_cmd():
+        cmd = [
+            "srun",
+            "--cpus-per-task=2",
+            "--ntasks=1",
+            "--threads=1",
+            f"--nodelist={slurm_nodelist}",
+        ]
+        if slurm_exclude_nodes:
+            cmd.append(f"--exclude={slurm_exclude_nodes}")
+        return cmd
+
+    # ----------------------------------------------------------------------
+
+    def submit():
+        failures = 0
+        logging.info(f"submitting {num_runs} jobs")
+        with concurrent.futures.ThreadPoolExecutor(max_workers=num_runs) as executor:
+            future_to_runid = {executor.submit(run_cmaple, run_id): run_id for run_id in (f"{ri:03d}" for ri in range(num_runs))}
+            for future in concurrent.futures.as_completed(future_to_runid):
+                try:
+                    future.result()
+                except Exception as err:
+                    logging.error(f"cmaple {working_dir} {future_to_runid[future]} FAILED: {err}")
+                    failures += 1
+                else:
+                    logging.info(f"cmaple run {working_dir} {future_to_runid[future]} completed")
+        if failures:
+            logging.error(f"cmaple {working_dir} FAILED: {failures} runs failed")
+        else:
+            logging.info("cmaple completed")
+
+    # ----------------------------------------------------------------------
+
+    def results_available():
+        return len(list(output_dir.glob("*000.treefile"))) > 0
+    
+    tree_file = output_dir.joinpath("000.treefile")
+    
+    if not results_available():
+        start = datetime.datetime.now()
+        submit()
+        logging.info(f"cmaple elapsed: {datetime.datetime.now() - start}")
+
+        output_dir.parent.joinpath("cmaple_tree.txt").open("w").write(str(tree_file))
+
+    return tree_file
+
+# ----------------------------------------------------------------------
+
+def get_output_dir(working_dir, prefix):
+    output_dirs = list(working_dir.glob(prefix + "*"))
+    if output_dirs:
+        output_dir = max(output_dirs)
+    else:
+        output_dir = working_dir.joinpath(prefix + datetime.datetime.now().strftime('%Y-%m%d-%H%M%S'))
+        output_dir.mkdir()
+        output_dir.chmod(0o777)
+    return output_dir
+
+# ======================================================================
+
+try:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("subdir")
+    # parser.add_argument("outgroup")
+    parser.add_argument("-n", "--num-runs", dest="num_runs", type=int, default=1)
+    parser.add_argument("--node", dest="slurm_nodelist", default="i22")
+    parser.add_argument("--exclude-nodes", dest="slurm_exclude_nodes", default=None)
+    args = parser.parse_args()
+    logging.basicConfig(level=logging.DEBUG, format="%(levelname)s %(asctime)s: %(message)s")
+    exit_code = main(args) or 0
+except Exception as err:
+    logging.error(f"> {err}\n{traceback.format_exc()}")
+    exit_code = 1
+exit(exit_code)
+
+# ======================================================================
diff --git a/proj/weekly-tree/pdf b/proj/weekly-tree/pdf
index eacaa2f..f8d1fa5 100755
--- a/proj/weekly-tree/pdf
+++ b/proj/weekly-tree/pdf
@@ -18,7 +18,7 @@ if [[ ${HIDB_SUBTYPE:0:1} == "b" ]]; then HIDB_SUBTYPE=b; fi
 # ----------------------------------------------------------------------------------------------------
 
 if [[ ! -f ${SUBTYPE}/${SUBTYPE}.tjz ]]; then
-    ${AE_ROOT}/bin/tree-to-json --populate ${SUBTYPE} --ladderize --print-cumulative 20 --remove-if-cumulative-more-than ${CUMUL[${SUBTYPE}]} $(cat ${SUBTYPE}/garli-best.txt) ${SUBTYPE}/${SUBTYPE}.tjz
+    ${AE_ROOT}/bin/tree-to-json --populate ${SUBTYPE} --ladderize --print-cumulative 20 --remove-if-cumulative-more-than ${CUMUL[${SUBTYPE}]} $(cat ${SUBTYPE}/cmaple_tree-rot-trim-col.txt) ${SUBTYPE}/${SUBTYPE}.tjz
 fi
 if [[ ! -f ${SUBTYPE}/${SUBTYPE}.pdf ]]; then
     tal -s ${TAL} ${SUBTYPE}/${SUBTYPE}.tjz ${SUBTYPE}/${SUBTYPE}.txt ${SUBTYPE}/${SUBTYPE}.pdf
@@ -30,7 +30,7 @@ if [[ ${HIDB_SUBTYPE:0:1} == "h" ]]; then
     ASR_INFIX=".asr"
     if [[ ! -f ${SUBTYPE}/${SUBTYPE}${ASR_INFIX}.tjz ]]; then
         ASR_STATES=$(cat ${SUBTYPE}/raxml-asr-best.txt | sed 's/ancestralTree/ancestralStates/')
-        ${AE_ROOT}/bin/tree-to-json --populate ${SUBTYPE} --asr-tree $(cat ${SUBTYPE}/raxml-asr-best.txt) --asr-states ${ASR_STATES} --ladderize --print-cumulative 20 --remove-if-cumulative-more-than ${CUMUL[${SUBTYPE}]} $(cat ${SUBTYPE}/garli-best.txt) ${SUBTYPE}/${SUBTYPE}${ASR_INFIX}.tjz
+        ${AE_ROOT}/bin/tree-to-json --populate ${SUBTYPE} --asr-tree $(cat ${SUBTYPE}/raxml-asr-best.txt) --asr-states ${ASR_STATES} --ladderize --print-cumulative 20 --remove-if-cumulative-more-than ${CUMUL[${SUBTYPE}]} $(cat ${SUBTYPE}/cmaple_tree-rot-trim-col.txt) ${SUBTYPE}/${SUBTYPE}${ASR_INFIX}.tjz
     fi
     if [[ ! -f ${SUBTYPE}/${SUBTYPE}${ASR_INFIX}.pdf ]]; then
         tal -s ${TAL} -D aa-transitions-method=imported ${SUBTYPE}/${SUBTYPE}${ASR_INFIX}.tjz ${SUBTYPE}/${SUBTYPE}${ASR_INFIX}.txt ${SUBTYPE}/${SUBTYPE}${ASR_INFIX}.pdf
diff --git a/proj/weekly-tree/raxml-asr b/proj/weekly-tree/raxml-asr
index 8baa059..6dea16c 100755
--- a/proj/weekly-tree/raxml-asr
+++ b/proj/weekly-tree/raxml-asr
@@ -10,8 +10,9 @@ sRandomGen = random.SystemRandom()
 def main(args: argparse.Namespace):
     argv = '"' + '" "'.join(sys.argv) + '"'
     working_dir = Path(args.subdir).resolve()
-    source_fas = working_dir.joinpath("source.fas")
-    source_tree = Path(working_dir.joinpath("raxml-best.txt").open().read().strip()).resolve()
+    source_fas = working_dir.joinpath("source-trim.fas")
+    source_tree = Path(working_dir.joinpath("cmaple_tree-rot-trim.txt").open().read().strip()).resolve()
+
     if os.fork():
         # parent
         exit(0)
@@ -30,13 +31,25 @@ def raxml_ng_asr(working_dir: Path, source_fas: Path, source_tree: Path, slurm_n
 
     output_dir = get_output_dir(working_dir, "raxml-ng-asr.")
 
+    source_tree_with_min_branch_lengths = source_tree.parent.joinpath("000_minbl.treefile")
+    bl_script_path = os.environ["AE_ROOT"] + "/R/set_min_branch_lengths.R"
+
+    set_min_bl_cmd = [
+        bl_script_path, 
+        str(source_tree.resolve()), 
+        str(source_tree_with_min_branch_lengths.resolve()),
+        "0.0001"
+        ]
+
+    # ----------------------------------------------------------------------
+
     raxml_cmd = [
         "/syn/bin/raxml-ng",
         "--ancestral",
         "--model", "GTR+G+I", # raxml: -m GTRGAMMAI -c 4
         "--msa", str(source_fas),
         "--msa-format", "FASTA",
-        "--tree", str(source_tree),
+        "--tree", str(source_tree_with_min_branch_lengths),
         "--log", "PROGRESS", # VERBOSE, DEBUG
         "--threads", "1",
     ]
@@ -53,6 +66,7 @@ def raxml_ng_asr(working_dir: Path, source_fas: Path, source_tree: Path, slurm_n
         with output_dir.joinpath("commands.txt").open("a") as commands_txt:
             print(" ".join(cmd), file=commands_txt)
         # print(cmd)
+        subprocess.call(set_min_bl_cmd)
         subprocess.check_call(cmd)
 
     def srun_cmd():
diff --git a/proj/weekly-tree/submit b/proj/weekly-tree/submit
index 4cb9be3..4c5fd51 100755
--- a/proj/weekly-tree/submit
+++ b/proj/weekly-tree/submit
@@ -1,3 +1,5 @@
+
+
 #! /usr/bin/env bash
 set -o errexit -o errtrace -o pipefail -o nounset # -o xtrace
 
@@ -6,13 +8,15 @@ submit_tree()
     NODE="$1"
     SUBTYPE="$2"
     (
-        ${AE_ROOT}/proj/weekly-tree/raxml --node ${NODE} ${SUBTYPE} &&
+        ${AE_ROOT}/proj/weekly-tree/make-cmaple --node ${NODE} ${SUBTYPE} &&
+            ${AE_ROOT}/R/rotate.R ${SUBTYPE}/cmaple_tree.txt ${SUBTYPE}/source.fas &&
+            ${AE_ROOT}/R/tree_trim.R ${SUBTYPE} ${SUBTYPE}/cmaple_tree-rot.txt ${SUBTYPE}/source.fas &&
             ${AE_ROOT}/proj/weekly-tree/raxml-asr --node ${NODE} ${SUBTYPE} &&
-            ${AE_ROOT}/proj/weekly-tree/garli --node ${NODE} ${SUBTYPE} &&
+            ${AE_ROOT}/R/collapse_short_branches.R ${SUBTYPE}/cmaple_tree-rot-trim.txt 0.00005 &&
             ${AE_ROOT}/proj/weekly-tree/pdf ${SUBTYPE}
     ) >${SUBTYPE}/submit.log 2>&1 &
 }
 
 submit_tree i20 h1
 submit_tree i22 h3
-submit_tree i19 bvic
+submit_tree i18 bvic
diff --git a/proj/weekly-tree/submit-memvar b/proj/weekly-tree/submit-memvar
new file mode 100755
index 0000000..1a2c593
--- /dev/null
+++ b/proj/weekly-tree/submit-memvar
@@ -0,0 +1,18 @@
+#! /usr/bin/env bash
+set -o errexit -o errtrace -o pipefail -o nounset # -o xtrace
+
+submit_tree()
+{
+    NODE="$1"
+    SUBTYPE="$2"
+    (
+        ${AE_ROOT}/proj/weekly-tree/raxml --node ${NODE} ${SUBTYPE} &&
+            ${AE_ROOT}/proj/weekly-tree/raxml-asr --node ${NODE} ${SUBTYPE} &&
+            ${AE_ROOT}/proj/weekly-tree/garli-memvar --node ${NODE} ${SUBTYPE} &&
+            ${AE_ROOT}/proj/weekly-tree/pdf ${SUBTYPE}
+    ) >${SUBTYPE}/submit.log 2>&1 &
+}
+
+# submit_tree i21 h1
+# submit_tree i22 h3
+submit_tree i22 bvic
diff --git a/proj/weekly-tree/submit-new b/proj/weekly-tree/submit-new
new file mode 100755
index 0000000..69dd730
--- /dev/null
+++ b/proj/weekly-tree/submit-new
@@ -0,0 +1,18 @@
+#! /usr/bin/env bash
+set -o errexit -o errtrace -o pipefail -o nounset # -o xtrace
+
+submit_tree()
+{
+    NODE="$1"
+    SUBTYPE="$2"
+    (
+        ${AE_ROOT}/proj/weekly-tree/raxml --node ${NODE} ${SUBTYPE} &&
+            ${AE_ROOT}/proj/weekly-tree/raxml-asr --node ${NODE} ${SUBTYPE} &&
+            ${AE_ROOT}/proj/weekly-tree/garli-h3 --node ${NODE} ${SUBTYPE} &&
+            ${AE_ROOT}/proj/weekly-tree/pdf ${SUBTYPE}
+    ) >${SUBTYPE}/submit.log 2>&1 &
+}
+
+submit_tree i20 h1 #i21
+submit_tree i22 h3
+submit_tree i19 bvic