diff --git a/.clang-format b/.clang-format index 548733d..5f8f5ca 100644 --- a/.clang-format +++ b/.clang-format @@ -148,5 +148,3 @@ StatementMacros: TabWidth: 8 UseTab: Never ... - - diff --git a/CMakeLists.txt b/CMakeLists.txt index c8bed87..327d025 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,7 +33,7 @@ if (UNIX) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-missing-braces -Wno-unknown-attributes -Wno-unused-function") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror -Wno-missing-braces -Wno-unknown-attributes -Wno-unused-function") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") if (SSHASH_USE_SANITIZERS) @@ -63,7 +63,6 @@ set(SSHASH_SOURCES src/dictionary.cpp src/query.cpp src/info.cpp - src/statistics.cpp ) set(SSHASH_INCLUDE_DIRS diff --git a/README.md b/README.md index f61d430..5294829 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7772316.svg)](https://doi.org/10.5281/zenodo.7772316) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7239205.svg)](https://doi.org/10.5281/zenodo.7239205) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.17582116.svg)](https://doi.org/10.5281/zenodo.17582116) @@ -24,8 +25,8 @@ The data structure is described in the following papers: For a dictionary of n k-mers, two basic queries are supported: -- i = **Lookup**(g), where i is in [0,n) if the k-mer g is found in the dictionary or i = -1 otherwise; -- g = **Access**(i), where g is the k-mer associated to the identifier i. +- i = **Lookup**(x), where i is in [0,n) if the k-mer x is found in the dictionary or i = -1 otherwise; +- x = **Access**(i), where x is the k-mer associated to the identifier i. If also the weights of the k-mers (their frequency counts) are stored in the dictionary, then the dictionary is said to be *weighted* and it also supports: @@ -36,9 +37,9 @@ Other supported queries are: - **Membership Queries**: determine if a given k-mer is present in the dictionary or not. - **Streaming Queries**: stream through all k-mers of a given DNA file (.fasta or .fastq formats) to determine their membership to the dictionary. -- **Navigational Queries**: given a k-mer g[1..k] determine if g[2..k]+x is present (forward neighbourhood) and if x+g[1..k-1] is present (backward neighbourhood), for x = A, C, G, T ('+' here means string concatenation). -SSHash internally stores a set of strings, called *contigs* in the following, each associated to a distinct identifier. -If a contig identifier is specified for a navigational query (rather than a k-mer), then the backward neighbourhood of the first k-mer and the forward neighbourhood of the last k-mer in the contig are returned. +- **Navigational Queries**: given a k-mer x[1..k] determine if x[2..k]+c is present (forward neighbourhood) and if c+x[1..k-1] is present (backward neighbourhood), for c in {A,C,G,T} ('+' here means string concatenation). +SSHash internally stores a set of strings, each associated to a distinct identifier. +If a string identifier is specified for a navigational query (rather than a k-mer), then the backward neighbourhood of the first k-mer and the forward neighbourhood of the last k-mer in the string are returned. If you are interested in a **membership-only** version of SSHash, have a look at [SSHash-Lite](https://github.com/jermp/sshash-lite). It also works for input files with duplicate k-mers (e.g., [matchtigs](https://github.com/algbio/matchtigs) [4]). For a query sequence S and a given coverage threshold E in [0,1], the sequence is considered to be present in the dictionary if at least E*(|S|-k+1) of the k-mers of S are positive. @@ -76,6 +77,8 @@ To compile the code for a release environment (see file `CMakeLists.txt` for the cmake .. make -j +**NOTE**: For best performance on `x86` architectures, the option `-D SSHASH_USE_ARCH_NATIVE` can be specified as well. + For a testing environment, use the following instead: mkdir debug_build @@ -142,18 +145,6 @@ Tools and Usage There is one executable called `sshash` after the compilation, which can be used to run a tool. Run `./sshash` as follows to see a list of available tools. - == SSHash: (S)parse and (S)kew (Hash)ing of k-mers ========================= - - Usage: ./sshash ... - - Available tools: - build build a dictionary - query query a dictionary - check check correctness of a dictionary - bench run performance tests for a dictionary - permute permute a weighted input file - compute-statistics compute index statistics - For large-scale indexing, it could be necessary to increase the number of file descriptors that can be opened simultaneously: ulimit -n 2048 @@ -179,15 +170,15 @@ such collections of stitched unitigs can be obtained from raw FASTA files. ### Example 1 - ./sshash build -i ../data/unitigs_stitched/salmonella_enterica_k31_ust.fa.gz -k 31 -m 13 --check --bench -o salmonella_enterica.index + ./sshash build -i ../data/unitigs_stitched/salmonella_enterica_k31_ust.fa.gz -k 31 -m 13 --check --bench -o salmonella_enterica.sshash This example builds a dictionary for the k-mers read from the file `../data/unitigs_stitched/salmonella_enterica_k31_ust.fa.gz`, -with k = 31 and m = 13. It also check the correctness of the dictionary (`--check` option), run a performance benchmark (`--bench` option), and serializes the index on disk to the file `salmonella_enterica.index`. +with k = 31 and m = 13. It also check the correctness of the dictionary (`--check` option), run a performance benchmark (`--bench` option), and serializes the index on disk to the file `salmonella_enterica.sshash`. To run a performance benchmark after construction of the index, use: - ./sshash bench -i salmonella_enterica.index + ./sshash bench -i salmonella_enterica.sshash To also store the weights, use the option `--weighted`: @@ -195,34 +186,34 @@ To also store the weights, use the option `--weighted`: ### Example 2 - ./sshash build -i ../data/unitigs_stitched/salmonella_100_k31_ust.fa.gz -k 31 -m 15 -l 2 -o salmonella_100.index + ./sshash build -i ../data/unitigs_stitched/salmonella_100_k31_ust.fa.gz -k 31 -m 15 -o salmonella_100.sshash -This example builds a dictionary from the input file `../data/unitigs_stitched/salmonella_100_k31_ust.fa.gz` (a pangenome consisting in 100 genomes of *Salmonella Enterica*), with k = 31, m = 15, and l = 2. It also serializes the index on disk to the file `salmonella_100.index`. +This example builds a dictionary from the input file `../data/unitigs_stitched/salmonella_100_k31_ust.fa.gz` (a pangenome consisting in 100 genomes of *Salmonella Enterica*), with k = 31, m = 15, and l = 2. It also serializes the index on disk to the file `salmonella_100.sshash`. To perform some streaming membership queries, use: - ./sshash query -i salmonella_100.index -q ../data/queries/SRR5833294.10K.fastq.gz + ./sshash query -i salmonella_100.sshash -q ../data/queries/SRR5833294.10K.fastq.gz if your queries are meant to be read from a FASTQ file, or - ./sshash query -i salmonella_100.index -q ../data/queries/salmonella_enterica.fasta.gz --multiline + ./sshash query -i salmonella_100.sshash -q ../data/queries/salmonella_enterica.fasta.gz --multiline if your queries are to be read from a (multi-line) FASTA file. ### Example 3 - ./sshash build -i ../data/unitigs_stitched/salmonella_100_k31_ust.fa.gz -k 31 -m 13 -l 4 -s 347692 --canonical -o salmonella_100.canon.index + ./sshash build -i ../data/unitigs_stitched/salmonella_100_k31_ust.fa.gz -k 31 -m 13 --canonical -o salmonella_100.canon.sshash -This example builds a dictionary from the input file `../data/unitigs_stitched/salmonella_100_k31_ust.fa.gz` (same used in Example 2), with k = 31, m = 13, l = 4, using a seed 347692 for construction (`-s 347692`), and with the canonical parsing modality (option `--canonical`). The dictionary is serialized on disk to the file `salmonella_100.canon.index`. +This example builds a dictionary from the input file `../data/unitigs_stitched/salmonella_100_k31_ust.fa.gz` (same used in Example 2), with k = 31, m = 13, and with the canonical parsing modality (option `--canonical`). The dictionary is serialized on disk to the file `salmonella_100.canon.sshash`. -The "canonical" version of the dictionary offers more speed for only a little space increase (for a suitable choice of parameters m and l), especially under low-hit workloads -- when the majority of k-mers are not found in the dictionary. (For all details, refer to the paper.) +The "canonical" version of the dictionary offers more speed for only a little space increase, especially under low-hit workloads -- when the majority of k-mers are not found in the dictionary. (For all details, refer to the paper.) Below a comparison between the dictionary built in Example 2 (not canonical) and the one just built (Example 3, canonical). - ./sshash query -i salmonella_100.index -q ../data/queries/SRR5833294.10K.fastq.gz + ./sshash query -i salmonella_100.sshash -q ../data/queries/SRR5833294.10K.fastq.gz - ./sshash query -i salmonella_100.canon.index -q ../data/queries/SRR5833294.10K.fastq.gz + ./sshash query -i salmonella_100.canon.sshash -q ../data/queries/SRR5833294.10K.fastq.gz Both queries should originate the following report (reported here for reference): @@ -262,33 +253,24 @@ Input Files SSHash is meant to index k-mers from collections that **do not contain duplicates nor invalid k-mers** (strings containing symbols different from {A,C,G,T}). -These collections can be obtained, for example, by extracting the maximal unitigs of a de Bruijn graph. - -To do so, we can use the tool [BCALM2](https://github.com/GATB/bcalm). -This tool builds a compacted de Bruijn graph and outputs its maximal unitigs. -From the output of BCALM2, we can then *stitch* (i.e., glue) some unitigs to reduce the number of nucleotides. The stitiching process is carried out using the [UST](https://github.com/jermp/UST) tool. +These collections can be obtained, for example, by extracting the maximal unitigs of a de Bruijn graph, or eulertigs, using the [GGCAT](https://github.com/algbio/ggcat) algorithm. **NOTE**: Input files are expected to have **one DNA sequence per line**. If a sequence spans multiple lines (e.g., multi-fasta), the lines should be concatenated before indexing. -Below we provide a complete example (assuming both BCALM2 and UST are installed correctly) that downloads the Human (GRCh38) Chromosome 13 and extracts the maximal stitiched unitigs for k = 31. - - mkdir DNA_datasets - wget http://ftp.ensembl.org/pub/current_fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.chromosome.13.fa.gz -O DNA_datasets/Homo_sapiens.GRCh38.dna.chromosome.13.fa.gz - ~/bcalm/build/bcalm -in ~/DNA_datasets/Homo_sapiens.GRCh38.dna.chromosome.13.fa.gz -kmer-size 31 -abundance-min 1 -nb-cores 8 - ~/UST/ust -k 31 -i ~/Homo_sapiens.GRCh38.dna.chromosome.13.fa.unitigs.fa - gzip Homo_sapiens.GRCh38.dna.chromosome.13.fa.unitigs.fa.ust.fa - rm ~/Homo_sapiens.GRCh38.dna.chromosome.13.fa.unitigs.fa - #### Datasets -The script `scripts/download_and_preprocess_datasets.sh` +The script `scripts/download_and_preprocess_datasets.sh` of [this release](https://github.com/jermp/sshash/releases/tag/v3.0.0) contains all the needed steps to download and pre-process the datasets that we used in [1]. -For the experiments in [2] and [3], we used the datasets available on [Zenodo](https://doi.org/10.5281/zenodo.7772316). +For the experiments in [2] and [3], we used the datasets available at [https://doi.org/10.5281/zenodo.7772316](https://doi.org/10.5281/zenodo.7772316). + +For the latest benchmarks maintained in [this other repository](https://github.com/jermp/kmer_sets_benchmark) +we used the datasets described at [https://zenodo.org/records/17582116](https://zenodo.org/records/17582116). #### Weights -Using the option `-all-abundance-counts` of BCALM2, it is possible to also include the abundance counts of the k-mers in the BCALM2 output. Then, use the option `-a 1` of UST to include such counts in the stitched unitigs. + +Using the option `-all-abundance-counts` of [BCALM2](https://github.com/GATB/bcalm), it is possible to also include the abundance counts of the k-mers in the BCALM2 output. Then, use the option `-a 1` of [UST](https://github.com/jermp/UST) to include such counts in the stitched unitigs. Create a New Release -------------------- diff --git a/benchmarks/README.md b/benchmarks/README.md index 277af03..f0780e3 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -1,34 +1,29 @@ -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7239205.svg)](https://doi.org/10.5281/zenodo.7239205) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.17582116.svg)](https://doi.org/10.5281/zenodo.17582116) Benchmarks ---------- -For these benchmarks we used the whole genomes of the following organisms: +For these benchmarks we used the datasets available here +[https://zenodo.org/records/17582116](https://zenodo.org/records/17582116). -- Gadus Morhua ("Cod") -- Falco Tinnunculus ("Kestrel") -- Homo Sapiens ("Human") - -for k = 31 and 63. +To run the benchmarks, from within the `build` directory, run -The datasets and queries used in these benchmarks can be downloaded -by running the script + python3 ../script/build.py + python3 ../script/bench.py + python3 ../script/streaming-query-high-hit.py -``` -bash download-datasets.sh -``` +where `` should be replaced by a suitable basename, e.g., the current date. -To run the benchmarks, from within the `build` directory, run +These are the results obtained on 10/11/25 (see logs [here](results-10-11-25)) +on a machine equipped with an AMD Ryzen Threadripper PRO 7985WX processor clocked at 5.40GHz. +The code was compiled with `gcc` 13.3.0. -``` -bash ../script/build.sh [prefix] -bash ../script/bench.sh [prefix] -bash ../script/streaming-query-high-hit.sh [prefix] -bash ../script/streaming-query-low-hit.sh [prefix] -``` +The indexes were build with a max RAM usage of 16 GB and 64 threads. +Queries were run using one thread, instead. -where `[prefix]` should be replaced by a suitable basename, e.g., the current date. +![](results-10-11-25/results.png) -These are the results obtained on 22/08/25 (see logs [here](results-22-08-25)). +The results can be exported to CSV format with -![](results-22-08-25/results.png) + python3 ../script/print_csv.py ../benchmarks/results-10-11-25/k31 + python3 ../script/print_csv.py ../benchmarks/results-10-11-25/k63 diff --git a/benchmarks/download-datasets.sh b/benchmarks/download-datasets.sh deleted file mode 100644 index 3892936..0000000 --- a/benchmarks/download-datasets.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -# inputs to index -wget https://zenodo.org/records/7239205/files/cod.k31.unitigs.fa.ust.fa.gz -wget https://zenodo.org/records/7239205/files/cod.k63.unitigs.fa.ust.fa.gz -wget https://zenodo.org/records/7239205/files/kestrel.k31.unitigs.fa.ust.fa.gz -wget https://zenodo.org/records/7239205/files/kestrel.k63.unitigs.fa.ust.fa.gz -wget https://zenodo.org/records/7239205/files/human.k31.unitigs.fa.ust.fa.gz -wget https://zenodo.org/records/7239205/files/human.k63.unitigs.fa.ust.fa.gz - -# queries -wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR583/004/SRR5833294/SRR5833294.fastq.gz -wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR128/049/SRR12858649/SRR12858649.fastq.gz -wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR114/043/SRR11449743/SRR11449743_1.fastq.gz -wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR801/SRR801268/SRR801268_1.fastq.gz -wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR590/005/SRR5901135/SRR5901135_1.fastq.gz diff --git a/benchmarks/print_csv.py b/benchmarks/print_csv.py new file mode 100644 index 0000000..6e60816 --- /dev/null +++ b/benchmarks/print_csv.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python3 + +import sys +import json +import os +from statistics import mean, StatisticsError +import math + +def format_time(microseconds): + seconds = microseconds / 1_000_000 + minutes = int(seconds // 60) + seconds = int(seconds % 60) + return f"{minutes}:{seconds:02d}" + +def parse_build_file(path, canonical_flag): + """Parse build JSONL file.""" + results = [] + with open(path) as f: + for line in f: + line = line.strip() + if not line: + continue + try: + d = json.loads(line) + except json.JSONDecodeError: + print(f"Skipping invalid JSON line in {path}", file=sys.stderr) + continue + + num_kmers = int(d["num_kmers"]) + index_bytes = int(d["index_size_in_bytes"]) + build_time_us = int(d["total_build_time_in_microsec"]) + + bits_per_kmer = (index_bytes * 8) / num_kmers + gb = index_bytes / 1e9 + build_time_fmt = format_time(build_time_us) + + fname = os.path.basename(d["input_filename"]) + collection = fname.split(".")[0].capitalize() + k = d["k"] + + results.append({ + "k": k, + "Collection": collection, + "m": d["m"], + "canonical": "yes" if canonical_flag else "no", + "bits_per_kmer": f"{bits_per_kmer:.2f}", + "total_GB": f"{gb:.2f}", + "build_time": build_time_fmt + }) + return results + +def parse_bench_file(path, canonical_flag): + """Parse benchmark JSONL file and average per collection.""" + lookup_data = {} + with open(path) as f: + for line in f: + line = line.strip() + if not line: + continue + try: + d = json.loads(line) + except json.JSONDecodeError: + print(f"Skipping invalid JSON line in {path}", file=sys.stderr) + continue + + fname = os.path.basename(d["index_filename"]) + collection = fname.split(".")[0].capitalize() + m = d["m"] + k = d["k"] + canonical = "yes" if canonical_flag else "no" + + key = (collection, m, canonical) + entry = lookup_data.setdefault(key, { + "k": k, + "pos": [], "neg": [], "access": [], "iter": [] + }) + entry["pos"].append(float(d["positive lookup (avg_nanosec_per_kmer)"])) + entry["neg"].append(float(d["negative lookup (avg_nanosec_per_kmer)"])) + entry["access"].append(float(d["access (avg_nanosec_per_kmer)"])) + entry["iter"].append(float(d["iterator (avg_nanosec_per_kmer)"])) + + # average the results + for k, v in lookup_data.items(): + try: + lookup_data[k] = { + "k": v["k"], + "pos": f"{mean(v['pos'])/1000:.2f}", + "neg": f"{mean(v['neg'])/1000:.2f}", + "access": f"{mean(v['access'])/1000:.2f}", + "iter": f"{mean(v['iter']):.2f}", + } + except StatisticsError: + lookup_data[k] = {"k": v["k"], "pos": "NA", "neg": "NA", "access": "NA", "iter": "NA"} + return lookup_data + + +def parse_streaming_file(path, canonical_flag): + """Parse streaming queries JSON file.""" + stream_data = {} + if not os.path.exists(path): + return stream_data + + with open(path) as f: + for line in f: + line = line.strip() + if not line: + continue + try: + d = json.loads(line) + except json.JSONDecodeError: + print(f"Skipping invalid JSON line in {path}", file=sys.stderr) + continue + + fname = os.path.basename(d["index_filename"]) + collection = fname.split(".")[0].capitalize() + canonical = "yes" if canonical_flag else "no" + + key = (collection, canonical) + num_kmers = int(d["num_kmers"]) + num_pos = int(d["num_positive_kmers"]) + num_ext = int(d["num_extensions"]) + elapsed_ms = int(d["elapsed_millisec"]) + + ns_per_kmer = int(math.ceil(elapsed_ms * 1e6 / num_kmers)) + hit_rate = (num_pos / num_kmers) * 100 if num_kmers else 0 + extension_rate = (num_ext / num_pos) * 100 if num_pos else 0 + + stream_data[key] = { + "ns_per_kmer": f"{ns_per_kmer}", + "hit_rate": f"{hit_rate:.2f}", + "extension_rate": f"{extension_rate:.2f}" + } + return stream_data + + +def main(): + if len(sys.argv) != 2: + print("Usage: print.py input_dir", file=sys.stderr) + sys.exit(1) + + input_dir = sys.argv[1] + reg_build_path = input_dir + "/regular-build.json" + canon_build_path = input_dir + "/canon-build.json" + reg_bench_path = input_dir + "/regular-bench.json" + canon_bench_path = input_dir + "/canon-bench.json" + reg_stream_path = input_dir + "/regular-streaming-queries-high-hit.json" + canon_stream_path = input_dir + "/canon-streaming-queries-high-hit.json" + + reg_build = parse_build_file(reg_build_path, False) + canon_build = parse_build_file(canon_build_path, True) + reg_bench = parse_bench_file(reg_bench_path, False) + canon_bench = parse_bench_file(canon_bench_path, True) + reg_stream = parse_streaming_file(reg_stream_path, False) + canon_stream = parse_streaming_file(canon_stream_path, True) + + # merge everything + all_builds = reg_build + canon_build + lookup_all = {**reg_bench, **canon_bench} + stream_all = {**reg_stream, **canon_stream} + + # CSV header + print("k,Collection,m,canonical,bits_per_kmer,total_GB,build_time,positive_lookup_ns,negative_lookup_ns,access_ns,iteration_ns,ns_per_kmer,hit_rate,extension_rate") + + for r in sorted(all_builds, key=lambda x: (int(x["k"]), x["Collection"], x["canonical"])): + lookup = lookup_all.get( + (r["Collection"], r["m"], r["canonical"]), # key + {"pos": "NA", "neg": "NA", "access": "NA", "iter": "NA", "k": r["k"]}) + stream = stream_all.get( + (r["Collection"], r["canonical"]), # key + {"ns_per_kmer": "NA", "hit_rate": "NA", "extension_rate": "NA"}) + + print(f"{r['k']},{r['Collection']},{r['m']},{r['canonical']},{r['bits_per_kmer']},{r['total_GB']},{r['build_time']},{lookup['pos']},{lookup['neg']},{lookup['access']},{lookup['iter']},{stream['ns_per_kmer']},{stream['hit_rate']},{stream['extension_rate']}") + +if __name__ == "__main__": + main() diff --git a/benchmarks/results-10-11-25/k31/canon-bench.json b/benchmarks/results-10-11-25/k31/canon-bench.json new file mode 100644 index 0000000..b0d06f8 --- /dev/null +++ b/benchmarks/results-10-11-25/k31/canon-bench.json @@ -0,0 +1,27 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.canon.sshash", "k": "31", "m": "20", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "437.640303", "negative lookup (avg_nanosec_per_kmer)": "368.735412", "access (avg_nanosec_per_kmer)": "275.079856", "iterator (avg_nanosec_per_kmer)": "2.499007"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.canon.sshash", "k": "31", "m": "20", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "454.028618", "negative lookup (avg_nanosec_per_kmer)": "371.439905", "access (avg_nanosec_per_kmer)": "277.998081", "iterator (avg_nanosec_per_kmer)": "2.557738"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.canon.sshash", "k": "31", "m": "20", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "439.052926", "negative lookup (avg_nanosec_per_kmer)": "364.553518", "access (avg_nanosec_per_kmer)": "278.494305", "iterator (avg_nanosec_per_kmer)": "2.542095"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.canon.sshash", "k": "31", "m": "20", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "442.814541", "negative lookup (avg_nanosec_per_kmer)": "394.080458", "access (avg_nanosec_per_kmer)": "277.356330", "iterator (avg_nanosec_per_kmer)": "2.551238"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.canon.sshash", "k": "31", "m": "20", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "438.393034", "negative lookup (avg_nanosec_per_kmer)": "397.279347", "access (avg_nanosec_per_kmer)": "276.140528", "iterator (avg_nanosec_per_kmer)": "2.578091"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.canon.sshash", "k": "31", "m": "20", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "444.467988", "negative lookup (avg_nanosec_per_kmer)": "395.111831", "access (avg_nanosec_per_kmer)": "275.241797", "iterator (avg_nanosec_per_kmer)": "2.516939"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "607.905103", "negative lookup (avg_nanosec_per_kmer)": "424.479846", "access (avg_nanosec_per_kmer)": "351.518291", "iterator (avg_nanosec_per_kmer)": "2.595285"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "604.119962", "negative lookup (avg_nanosec_per_kmer)": "424.261657", "access (avg_nanosec_per_kmer)": "352.599607", "iterator (avg_nanosec_per_kmer)": "2.513324"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "614.129887", "negative lookup (avg_nanosec_per_kmer)": "424.319141", "access (avg_nanosec_per_kmer)": "351.839341", "iterator (avg_nanosec_per_kmer)": "2.570409"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k31.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "964.312976", "negative lookup (avg_nanosec_per_kmer)": "590.478722", "access (avg_nanosec_per_kmer)": "878.720946", "iterator (avg_nanosec_per_kmer)": "2.500330"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k31.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "965.664933", "negative lookup (avg_nanosec_per_kmer)": "600.547251", "access (avg_nanosec_per_kmer)": "880.682088", "iterator (avg_nanosec_per_kmer)": "2.529999"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k31.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "962.638510", "negative lookup (avg_nanosec_per_kmer)": "592.798034", "access (avg_nanosec_per_kmer)": "887.008503", "iterator (avg_nanosec_per_kmer)": "2.522038"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "804.471276", "negative lookup (avg_nanosec_per_kmer)": "464.980411", "access (avg_nanosec_per_kmer)": "538.081542", "iterator (avg_nanosec_per_kmer)": "2.542060"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "799.616952", "negative lookup (avg_nanosec_per_kmer)": "456.482477", "access (avg_nanosec_per_kmer)": "534.852676", "iterator (avg_nanosec_per_kmer)": "2.502930"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "802.152088", "negative lookup (avg_nanosec_per_kmer)": "461.817978", "access (avg_nanosec_per_kmer)": "549.113821", "iterator (avg_nanosec_per_kmer)": "2.537954"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ec.k31.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "787.872622", "negative lookup (avg_nanosec_per_kmer)": "411.104772", "access (avg_nanosec_per_kmer)": "384.002101", "iterator (avg_nanosec_per_kmer)": "2.553101"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ec.k31.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "798.311633", "negative lookup (avg_nanosec_per_kmer)": "410.851839", "access (avg_nanosec_per_kmer)": "388.069527", "iterator (avg_nanosec_per_kmer)": "2.513800"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ec.k31.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "789.019697", "negative lookup (avg_nanosec_per_kmer)": "411.631504", "access (avg_nanosec_per_kmer)": "386.654014", "iterator (avg_nanosec_per_kmer)": "2.509549"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "692.848571", "negative lookup (avg_nanosec_per_kmer)": "400.421416", "access (avg_nanosec_per_kmer)": "365.651520", "iterator (avg_nanosec_per_kmer)": "2.470192"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "699.180468", "negative lookup (avg_nanosec_per_kmer)": "399.504994", "access (avg_nanosec_per_kmer)": "361.013311", "iterator (avg_nanosec_per_kmer)": "2.458981"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "703.840309", "negative lookup (avg_nanosec_per_kmer)": "406.306273", "access (avg_nanosec_per_kmer)": "360.361614", "iterator (avg_nanosec_per_kmer)": "2.460564"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.canon.sshash", "k": "31", "m": "19", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "406.652311", "negative lookup (avg_nanosec_per_kmer)": "354.570676", "access (avg_nanosec_per_kmer)": "261.789137", "iterator (avg_nanosec_per_kmer)": "2.543308"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.canon.sshash", "k": "31", "m": "19", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "408.743713", "negative lookup (avg_nanosec_per_kmer)": "356.006719", "access (avg_nanosec_per_kmer)": "260.019499", "iterator (avg_nanosec_per_kmer)": "2.557492"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.canon.sshash", "k": "31", "m": "19", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "407.029910", "negative lookup (avg_nanosec_per_kmer)": "358.602815", "access (avg_nanosec_per_kmer)": "260.672789", "iterator (avg_nanosec_per_kmer)": "2.553014"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k31.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "749.156047", "negative lookup (avg_nanosec_per_kmer)": "552.016283", "access (avg_nanosec_per_kmer)": "679.750339", "iterator (avg_nanosec_per_kmer)": "2.528279"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k31.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "750.656001", "negative lookup (avg_nanosec_per_kmer)": "550.621588", "access (avg_nanosec_per_kmer)": "675.161470", "iterator (avg_nanosec_per_kmer)": "2.515422"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k31.canon.sshash", "k": "31", "m": "21", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "749.104199", "negative lookup (avg_nanosec_per_kmer)": "556.790046", "access (avg_nanosec_per_kmer)": "683.967747", "iterator (avg_nanosec_per_kmer)": "2.602998"} diff --git a/benchmarks/results-10-11-25/k31/canon-bench.log b/benchmarks/results-10-11-25/k31/canon-bench.log new file mode 100644 index 0000000..e40f300 --- /dev/null +++ b/benchmarks/results-10-11-25/k31/canon-bench.log @@ -0,0 +1,135 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 437.64 +negative lookup (avg_nanosec_per_kmer) 368.735 +access (avg_nanosec_per_kmer) = 275.08 +iterator (avg_nanosec_per_kmer) = 2.49901 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 454.029 +negative lookup (avg_nanosec_per_kmer) 371.44 +access (avg_nanosec_per_kmer) = 277.998 +iterator (avg_nanosec_per_kmer) = 2.55774 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 439.053 +negative lookup (avg_nanosec_per_kmer) 364.554 +access (avg_nanosec_per_kmer) = 278.494 +iterator (avg_nanosec_per_kmer) = 2.5421 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 442.815 +negative lookup (avg_nanosec_per_kmer) 394.08 +access (avg_nanosec_per_kmer) = 277.356 +iterator (avg_nanosec_per_kmer) = 2.55124 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 438.393 +negative lookup (avg_nanosec_per_kmer) 397.279 +access (avg_nanosec_per_kmer) = 276.141 +iterator (avg_nanosec_per_kmer) = 2.57809 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 444.468 +negative lookup (avg_nanosec_per_kmer) 395.112 +access (avg_nanosec_per_kmer) = 275.242 +iterator (avg_nanosec_per_kmer) = 2.51694 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 607.905 +negative lookup (avg_nanosec_per_kmer) 424.48 +access (avg_nanosec_per_kmer) = 351.518 +iterator (avg_nanosec_per_kmer) = 2.59528 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 604.12 +negative lookup (avg_nanosec_per_kmer) 424.262 +access (avg_nanosec_per_kmer) = 352.6 +iterator (avg_nanosec_per_kmer) = 2.51332 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 614.13 +negative lookup (avg_nanosec_per_kmer) 424.319 +access (avg_nanosec_per_kmer) = 351.839 +iterator (avg_nanosec_per_kmer) = 2.57041 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 964.313 +negative lookup (avg_nanosec_per_kmer) 590.479 +access (avg_nanosec_per_kmer) = 878.721 +iterator (avg_nanosec_per_kmer) = 2.50033 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 965.665 +negative lookup (avg_nanosec_per_kmer) 600.547 +access (avg_nanosec_per_kmer) = 880.682 +iterator (avg_nanosec_per_kmer) = 2.53 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 962.639 +negative lookup (avg_nanosec_per_kmer) 592.798 +access (avg_nanosec_per_kmer) = 887.009 +iterator (avg_nanosec_per_kmer) = 2.52204 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 804.471 +negative lookup (avg_nanosec_per_kmer) 464.98 +access (avg_nanosec_per_kmer) = 538.082 +iterator (avg_nanosec_per_kmer) = 2.54206 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 799.617 +negative lookup (avg_nanosec_per_kmer) 456.482 +access (avg_nanosec_per_kmer) = 534.853 +iterator (avg_nanosec_per_kmer) = 2.50293 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 802.152 +negative lookup (avg_nanosec_per_kmer) 461.818 +access (avg_nanosec_per_kmer) = 549.114 +iterator (avg_nanosec_per_kmer) = 2.53795 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ec.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 787.873 +negative lookup (avg_nanosec_per_kmer) 411.105 +access (avg_nanosec_per_kmer) = 384.002 +iterator (avg_nanosec_per_kmer) = 2.5531 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ec.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 798.312 +negative lookup (avg_nanosec_per_kmer) 410.852 +access (avg_nanosec_per_kmer) = 388.07 +iterator (avg_nanosec_per_kmer) = 2.5138 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ec.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 789.02 +negative lookup (avg_nanosec_per_kmer) 411.632 +access (avg_nanosec_per_kmer) = 386.654 +iterator (avg_nanosec_per_kmer) = 2.50955 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 692.849 +negative lookup (avg_nanosec_per_kmer) 400.421 +access (avg_nanosec_per_kmer) = 365.652 +iterator (avg_nanosec_per_kmer) = 2.47019 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 699.18 +negative lookup (avg_nanosec_per_kmer) 399.505 +access (avg_nanosec_per_kmer) = 361.013 +iterator (avg_nanosec_per_kmer) = 2.45898 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 703.84 +negative lookup (avg_nanosec_per_kmer) 406.306 +access (avg_nanosec_per_kmer) = 360.362 +iterator (avg_nanosec_per_kmer) = 2.46056 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 406.652 +negative lookup (avg_nanosec_per_kmer) 354.571 +access (avg_nanosec_per_kmer) = 261.789 +iterator (avg_nanosec_per_kmer) = 2.54331 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 408.744 +negative lookup (avg_nanosec_per_kmer) 356.007 +access (avg_nanosec_per_kmer) = 260.019 +iterator (avg_nanosec_per_kmer) = 2.55749 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 407.03 +negative lookup (avg_nanosec_per_kmer) 358.603 +access (avg_nanosec_per_kmer) = 260.673 +iterator (avg_nanosec_per_kmer) = 2.55301 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 749.156 +negative lookup (avg_nanosec_per_kmer) 552.016 +access (avg_nanosec_per_kmer) = 679.75 +iterator (avg_nanosec_per_kmer) = 2.52828 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 750.656 +negative lookup (avg_nanosec_per_kmer) 550.622 +access (avg_nanosec_per_kmer) = 675.161 +iterator (avg_nanosec_per_kmer) = 2.51542 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k31.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 749.104 +negative lookup (avg_nanosec_per_kmer) 556.79 +access (avg_nanosec_per_kmer) = 683.968 +iterator (avg_nanosec_per_kmer) = 2.603 diff --git a/benchmarks/results-10-11-25/k31/canon-build.json b/benchmarks/results-10-11-25/k31/canon-build.json new file mode 100644 index 0000000..80ecb73 --- /dev/null +++ b/benchmarks/results-10-11-25/k31/canon-build.json @@ -0,0 +1,9 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/cod.k31.eulertigs.fa.gz", "k": "31", "m": "20", "canonical": "true", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "1677776", "step 2 (compute minimizer tuples)": "742988", "step 3 (merging minimizer tuples)": "5069909", "step 4 (build mphf)": "3674066", "step 5 (replacing minimizer values with MPHF hashes)": "3524940", "step 6 (merging minimizers tuples)": "7408673", "step 7.1 (build sparse index)": "1242493", "step 7.2 (build skew index)": "3339054", "step 7 (build sparse and skew index)": "4806763", "total_build_time_in_microsec": "26905115", "index_size_in_bytes": "566181081", "num_kmers": "502465200"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/kestrel.k31.eulertigs.fa.gz", "k": "31", "m": "20", "canonical": "true", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "3213367", "step 2 (compute minimizer tuples)": "1310902", "step 3 (merging minimizer tuples)": "11197918", "step 4 (build mphf)": "8514381", "step 5 (replacing minimizer values with MPHF hashes)": "7990571", "step 6 (merging minimizers tuples)": "31178206", "step 7.1 (build sparse index)": "1981005", "step 7.2 (build skew index)": "1058644", "step 7 (build sparse and skew index)": "3496165", "total_build_time_in_microsec": "66901510", "index_size_in_bytes": "1246156075", "num_kmers": "1150399205"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "true", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "7741285", "step 2 (compute minimizer tuples)": "3132330", "step 3 (merging minimizer tuples)": "57806636", "step 4 (build mphf)": "18672185", "step 5 (replacing minimizer values with MPHF hashes)": "18575641", "step 6 (merging minimizers tuples)": "67851902", "step 7.1 (build sparse index)": "6579041", "step 7.2 (build skew index)": "8938319", "step 7 (build sparse and skew index)": "16725239", "total_build_time_in_microsec": "190505218", "index_size_in_bytes": "3135788878", "num_kmers": "2505678680"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/axolotl.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "true", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "74469166", "step 2 (compute minimizer tuples)": "132929365", "step 3 (merging minimizer tuples)": "311498555", "step 4 (build mphf)": "271387025", "step 5 (replacing minimizer values with MPHF hashes)": "283505437", "step 6 (merging minimizers tuples)": "409148589", "step 7.1 (build sparse index)": "72560865", "step 7.2 (build skew index)": "56665151", "step 7 (build sparse and skew index)": "138006592", "total_build_time_in_microsec": "1620944729", "index_size_in_bytes": "26027317465", "num_kmers": "17987935180"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/hprc.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "true", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "16860289", "step 2 (compute minimizer tuples)": "9072178", "step 3 (merging minimizer tuples)": "60514444", "step 4 (build mphf)": "23944043", "step 5 (replacing minimizer values with MPHF hashes)": "26552714", "step 6 (merging minimizers tuples)": "114259532", "step 7.1 (build sparse index)": "15751012", "step 7.2 (build skew index)": "16348994", "step 7 (build sparse and skew index)": "34070557", "total_build_time_in_microsec": "285273757", "index_size_in_bytes": "5544019788", "num_kmers": "3718120949"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/ec.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "true", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "5744102", "step 2 (compute minimizer tuples)": "1562408", "step 3 (merging minimizer tuples)": "13432774", "step 4 (build mphf)": "7287468", "step 5 (replacing minimizer values with MPHF hashes)": "8407509", "step 6 (merging minimizers tuples)": "32332023", "step 7.1 (build sparse index)": "4999557", "step 7.2 (build skew index)": "6457378", "step 7 (build sparse and skew index)": "12097558", "total_build_time_in_microsec": "80863842", "index_size_in_bytes": "1600485113", "num_kmers": "1111018845"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "true", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "4578309", "step 2 (compute minimizer tuples)": "1359941", "step 3 (merging minimizer tuples)": "10683665", "step 4 (build mphf)": "5988675", "step 5 (replacing minimizer values with MPHF hashes)": "6756195", "step 6 (merging minimizers tuples)": "29021973", "step 7.1 (build sparse index)": "4071384", "step 7.2 (build skew index)": "3781315", "step 7 (build sparse and skew index)": "8383331", "total_build_time_in_microsec": "66772089", "index_size_in_bytes": "1287008500", "num_kmers": "894310084"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k31.eulertigs.fa.gz", "k": "31", "m": "19", "canonical": "true", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "1113573", "step 2 (compute minimizer tuples)": "620769", "step 3 (merging minimizer tuples)": "3511262", "step 4 (build mphf)": "2761131", "step 5 (replacing minimizer values with MPHF hashes)": "2495039", "step 6 (merging minimizers tuples)": "5206076", "step 7.1 (build sparse index)": "960968", "step 7.2 (build skew index)": "39044", "step 7 (build sparse and skew index)": "1164370", "total_build_time_in_microsec": "16872220", "index_size_in_bytes": "398891769", "num_kmers": "376205185"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/jgi_fungi.batch-0.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "true", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "38018674", "step 2 (compute minimizer tuples)": "40881264", "step 3 (merging minimizer tuples)": "208951298", "step 4 (build mphf)": "214322784", "step 5 (replacing minimizer values with MPHF hashes)": "186685394", "step 6 (merging minimizers tuples)": "273627254", "step 7.1 (build sparse index)": "38396190", "step 7.2 (build skew index)": "5832073", "step 7 (build sparse and skew index)": "49839265", "total_build_time_in_microsec": "1012325933", "index_size_in_bytes": "16357630621", "num_kmers": "12319840464"} diff --git a/benchmarks/results-10-11-25/k31/canon-build.log b/benchmarks/results-10-11-25/k31/canon-build.log new file mode 100644 index 0000000..49e3fc3 --- /dev/null +++ b/benchmarks/results-10-11-25/k31/canon-build.log @@ -0,0 +1,2974 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k31.eulertigs.fa.gz -k 31 -m 20 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.canon.sshash --canonical +2025-11-10 23:51:57: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/cod.k31.eulertigs.fa.gz'... +read 1000000 sequences, 260758668 bases, 230758668 kmers +read 2000000 sequences, 549832064 bases, 489832064 kmers +read 2057242 sequences, 564182460 bases, 502465200 kmers +num_kmers 502465200 +cost: 2.0 + 0.245658 [bits/kmer] +max string length = 31415 +num bits per_absolute_offset = 30 +num bits per_relative_offset = 15 +num bits per_string_id = 21 +=== step 1 (encode strings): 1.67778 [sec] (3.33909 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.742988 [sec] (1.47869 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.bin' +=== step 3 (merging minimizer tuples): 5.06991 [sec] (10.0901 [ns/kmer]) +num_minimizers = 86163506 +num_minimizer_positions = 94463730 +num_super_kmers = 98209779 +building minimizers MPHF with 64 threads and 29 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.67407 [sec] (7.31208 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815117438038839.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 3.52494 [sec] (7.01529 [ns/kmer]) +=== step 6 (merging minimizers tuples): 7.40867 [sec] (14.7446 [ns/kmer]) +num_bits_per_offset = 30 +num_buckets_larger_than_1_not_in_skew_index 2988605/86163506 (3.46853%) +num_buckets_in_skew_index 5810/86163506 (0.00674299%) +max_bucket_size 68577 +log2_max_bucket_size 17 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 10013861/94463730 (10.6007%) +num_minimizer_positions_of_buckets_in_skew_index 1280778/94463730 (1.35584%) +=== step 7.1 (build sparse index): 1.24249 [sec] (2.47279 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 1643143 + partition = 1: num kmers in buckets of size > 128 and <= 256: 1178275 + partition = 2: num kmers in buckets of size > 256 and <= 512: 956038 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 605533 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 425543 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 382297 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 254872 + partition = 7: num kmers in buckets of size > 8192 and <= 68577: 624986 +num kmers in skew index = 6070687 (1.20818%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 1643143 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1643143)... + built mphs[0] for 1643143 kmers; bits/key = 2.56038 + built positions[0] for 1643143 kmers; bits/key = 7.0002 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 1178275 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1178275)... + built mphs[1] for 1178275 kmers; bits/key = 2.41806 + built positions[1] for 1178275 kmers; bits/key = 8.00031 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 956038 + building MPHF with 64 threads and 1 partitions (avg. partition size = 956038)... + built mphs[2] for 956038 kmers; bits/key = 2.56129 + built positions[2] for 956038 kmers; bits/key = 9.00035 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 605533 + building MPHF with 64 threads and 1 partitions (avg. partition size = 605533)... + built mphs[3] for 605533 kmers; bits/key = 2.41961 + built positions[3] for 605533 kmers; bits/key = 10.0006 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 425543 + building MPHF with 64 threads and 1 partitions (avg. partition size = 425543)... + built mphs[4] for 425543 kmers; bits/key = 2.42126 + built positions[4] for 425543 kmers; bits/key = 11.0009 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 382297 + building MPHF with 64 threads and 1 partitions (avg. partition size = 382297)... + built mphs[5] for 382297 kmers; bits/key = 2.42153 + built positions[5] for 382297 kmers; bits/key = 12.0009 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 254872 + building MPHF with 64 threads and 1 partitions (avg. partition size = 254872)... + built mphs[6] for 254872 kmers; bits/key = 2.56712 + built positions[6] for 254872 kmers; bits/key = 13.0013 + lower = 8192; upper = 68577; num_bits_per_pos = 17; num_kmers_in_partition = 624986 + building MPHF with 64 threads and 1 partitions (avg. partition size = 624986)... + built mphs[7] for 624986 kmers; bits/key = 2.41943 + built positions[7] for 624986 kmers; bits/key = 17.0005 +=== step 7.2 (build skew index): 3.33905 [sec] (6.64534 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 96.5247% +buckets with 2 minimizer positions = 2.16597% +buckets with 3 minimizer positions = 0.599947% +buckets with 4 minimizer positions = 0.247865% +buckets with 5 minimizer positions = 0.129293% +buckets with 6 minimizer positions = 0.0768388% +buckets with 7 minimizer positions = 0.0512653% +buckets with 8 minimizer positions = 0.0353688% +buckets with 9 minimizer positions = 0.0260667% +buckets with 10 minimizer positions = 0.0199121% +buckets with 11 minimizer positions = 0.0157805% +buckets with 12 minimizer positions = 0.0127513% +buckets with 13 minimizer positions = 0.0102503% +buckets with 14 minimizer positions = 0.00880535% +buckets with 15 minimizer positions = 0.0074347% +buckets with 16 minimizer positions = 0.00615574% +max_bucket_size = 68577 +=== step 7 (build sparse and skew index): 4.80676 [sec] (9.56636 [ns/kmer]) +=== total time: 26.9051 [sec] (53.5462 [ns/kmer]) +total index size: 566181081 [B] -- 566.181 [MB] +SPACE BREAKDOWN: + mphf: 0.487029 [bits/kmer] (2.84013 [bits/key]) -- 5.40276% + strings_offsets: 0.144419 [bits/kmer] -- 1.60208% + control_codewords: 5.31593 [bits/kmer] -- 58.9712% + mid_load_buckets: 0.597885 [bits/kmer] -- 6.63251% + begin_buckets_of_size: 4.26696e-06 [bits/kmer] -- 4.73347e-05% + strings: 2.24566 [bits/kmer] -- 24.9118% + skew_index: 0.223525 [bits/kmer] -- 2.47963% + weights: 2.92956e-06 [bits/kmer] -- 3.24984e-05% + -------------- + total: 9.01445 [bits/kmer] +2025-11-10 23:52:24: saving data structure to disk... +2025-11-10 23:52:27: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k31.eulertigs.fa.gz -k 31 -m 20 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.canon.sshash --canonical +2025-11-10 23:52:27: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/kestrel.k31.eulertigs.fa.gz'... +read 582860 sequences, 1167885005 bases, 1150399205 kmers +num_kmers 1150399205 +cost: 2.0 + 0.0303995 [bits/kmer] +max string length = 111973 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 20 +=== step 1 (encode strings): 3.21337 [sec] (2.79326 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 1.3109 [sec] (1.13952 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +=== step 3 (merging minimizer tuples): 11.1979 [sec] (9.73394 [ns/kmer]) +num_minimizers = 209937048 +num_minimizer_positions = 213990360 +num_super_kmers = 222970482 +building minimizers MPHF with 64 threads and 70 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 8.51438 [sec] (7.40124 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815147476111131.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 7.99057 [sec] (6.94591 [ns/kmer]) +=== step 6 (merging minimizers tuples): 31.1782 [sec] (27.1021 [ns/kmer]) +num_bits_per_offset = 31 +num_buckets_larger_than_1_not_in_skew_index 2035626/209937048 (0.969636%) +num_buckets_in_skew_index 2517/209937048 (0.00119893%) +max_bucket_size 5316 +log2_max_bucket_size 13 +num_partitions in skew index 7 +num_minimizer_positions_of_buckets_larger_than_1 5736623/213990360 (2.68079%) +num_minimizer_positions_of_buckets_in_skew_index 354832/213990360 (0.165817%) +=== step 7.1 (build sparse index): 1.981 [sec] (1.72202 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 846431 + partition = 1: num kmers in buckets of size > 128 and <= 256: 463901 + partition = 2: num kmers in buckets of size > 256 and <= 512: 298968 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 116879 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 120430 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 42891 + partition = 6: num kmers in buckets of size > 4096 and <= 5316: 9813 +num kmers in skew index = 1899313 (0.1651%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 846431 + building MPHF with 64 threads and 1 partitions (avg. partition size = 846431)... + built mphs[0] for 846431 kmers; bits/key = 2.41876 + built positions[0] for 846431 kmers; bits/key = 7.00042 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 463901 + building MPHF with 64 threads and 1 partitions (avg. partition size = 463901)... + built mphs[1] for 463901 kmers; bits/key = 2.42059 + built positions[1] for 463901 kmers; bits/key = 8.00074 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 298968 + building MPHF with 64 threads and 1 partitions (avg. partition size = 298968)... + built mphs[2] for 298968 kmers; bits/key = 2.42332 + built positions[2] for 298968 kmers; bits/key = 9.0012 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 116879 + building MPHF with 64 threads and 1 partitions (avg. partition size = 116879)... + built mphs[3] for 116879 kmers; bits/key = 2.57716 + built positions[3] for 116879 kmers; bits/key = 10.0031 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 120430 + building MPHF with 64 threads and 1 partitions (avg. partition size = 120430)... + built mphs[4] for 120430 kmers; bits/key = 2.43408 + built positions[4] for 120430 kmers; bits/key = 11.0027 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 42891 + building MPHF with 64 threads and 1 partitions (avg. partition size = 42891)... + built mphs[5] for 42891 kmers; bits/key = 2.46579 + built positions[5] for 42891 kmers; bits/key = 12.0089 + lower = 4096; upper = 5316; num_bits_per_pos = 13; num_kmers_in_partition = 9813 + building MPHF with 64 threads and 1 partitions (avg. partition size = 9813)... + built mphs[6] for 9813 kmers; bits/key = 2.48487 + built positions[6] for 9813 kmers; bits/key = 13.0374 +=== step 7.2 (build skew index): 1.05864 [sec] (0.920241 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 99.0292% +buckets with 2 minimizer positions = 0.755504% +buckets with 3 minimizer positions = 0.0986639% +buckets with 4 minimizer positions = 0.0387669% +buckets with 5 minimizer positions = 0.0208944% +buckets with 6 minimizer positions = 0.0129548% +buckets with 7 minimizer positions = 0.00864497% +buckets with 8 minimizer positions = 0.00604515% +buckets with 9 minimizer positions = 0.00458137% +buckets with 10 minimizer positions = 0.00347628% +buckets with 11 minimizer positions = 0.00277321% +buckets with 12 minimizer positions = 0.00222448% +buckets with 13 minimizer positions = 0.00183484% +buckets with 14 minimizer positions = 0.00150569% +buckets with 15 minimizer positions = 0.00123418% +buckets with 16 minimizer positions = 0.00103317% +max_bucket_size = 5316 +=== step 7 (build sparse and skew index): 3.49616 [sec] (3.03909 [ns/kmer]) +=== total time: 66.9015 [sec] (58.155 [ns/kmer]) +total index size: 1246156075 [B] -- 1246.16 [MB] +SPACE BREAKDOWN: + mphf: 0.514173 [bits/kmer] (2.81753 [bits/key]) -- 5.93329% + strings_offsets: 0.100021 [bits/kmer] -- 1.15419% + control_codewords: 5.8397 [bits/kmer] -- 67.3871% + mid_load_buckets: 0.154586 [bits/kmer] -- 1.78384% + begin_buckets_of_size: 1.8637e-06 [bits/kmer] -- 2.15061e-05% + strings: 2.0304 [bits/kmer] -- 23.4298% + skew_index: 0.0270208 [bits/kmer] -- 0.311806% + weights: 1.27956e-06 [bits/kmer] -- 1.47654e-05% + -------------- + total: 8.6659 [bits/kmer] +2025-11-10 23:53:34: saving data structure to disk... +2025-11-10 23:53:40: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.canon.sshash --canonical +2025-11-10 23:53:40: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.74129 [sec] (3.0895 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.64.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.65.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.66.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.67.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.68.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.69.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.70.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.71.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.72.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.73.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.74.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.75.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.76.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.77.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.78.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.79.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.80.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.81.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.82.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.83.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.84.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.85.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.86.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.87.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.88.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.89.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.90.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.91.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.92.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.93.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.94.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.95.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.96.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.97.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.98.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.99.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.100.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.101.bin'... +=== step 2 (compute minimizer tuples): 3.13233 [sec] (1.25009 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +=== step 3 (merging minimizer tuples): 57.8066 [sec] (23.0703 [ns/kmer]) +num_minimizers = 462224926 +num_minimizer_positions = 511201278 +num_super_kmers = 531186741 +building minimizers MPHF with 64 threads and 155 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 18.6722 [sec] (7.45195 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.1.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 18.5756 [sec] (7.41342 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815220322184433.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +=== step 6 (merging minimizers tuples): 67.8519 [sec] (27.0793 [ns/kmer]) +num_bits_per_offset = 32 +num_buckets_larger_than_1_not_in_skew_index 14266506/462224926 (3.08649%) +num_buckets_in_skew_index 60557/462224926 (0.0131012%) +max_bucket_size 22085 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 53165974/511201278 (10.4002%) +num_minimizer_positions_of_buckets_in_skew_index 10137441/511201278 (1.98306%) +=== step 7.1 (build sparse index): 6.57904 [sec] (2.62565 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 14957205 + partition = 1: num kmers in buckets of size > 128 and <= 256: 10906495 + partition = 2: num kmers in buckets of size > 256 and <= 512: 7473094 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 4774535 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 2638087 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 1593261 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 729770 + partition = 7: num kmers in buckets of size > 8192 and <= 22085: 506148 +num kmers in skew index = 43578595 (1.73919%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 14957205 + building MPHF with 64 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[0] for 14957205 kmers; bits/key = 2.56583 + built positions[0] for 14957205 kmers; bits/key = 7.00002 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 10906495 + building MPHF with 64 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[1] for 10906495 kmers; bits/key = 2.61744 + built positions[1] for 10906495 kmers; bits/key = 8.00003 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 7473094 + building MPHF with 64 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[2] for 7473094 kmers; bits/key = 2.65359 + built positions[2] for 7473094 kmers; bits/key = 9.00004 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 4774535 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[3] for 4774535 kmers; bits/key = 2.75085 + built positions[3] for 4774535 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 2638087 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2638087)... + built mphs[4] for 2638087 kmers; bits/key = 2.55989 + built positions[4] for 2638087 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1593261 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1593261)... + built mphs[5] for 1593261 kmers; bits/key = 2.56041 + built positions[5] for 1593261 kmers; bits/key = 12.0002 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 729770 + building MPHF with 64 threads and 1 partitions (avg. partition size = 729770)... + built mphs[6] for 729770 kmers; bits/key = 2.56195 + built positions[6] for 729770 kmers; bits/key = 13.0005 + lower = 8192; upper = 22085; num_bits_per_pos = 15; num_kmers_in_partition = 506148 + building MPHF with 64 threads and 1 partitions (avg. partition size = 506148)... + built mphs[7] for 506148 kmers; bits/key = 2.42048 + built positions[7] for 506148 kmers; bits/key = 15.0007 +=== step 7.2 (build skew index): 8.93832 [sec] (3.56722 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 96.9004% +buckets with 2 minimizer positions = 1.89323% +buckets with 3 minimizer positions = 0.481205% +buckets with 4 minimizer positions = 0.215066% +buckets with 5 minimizer positions = 0.121924% +buckets with 6 minimizer positions = 0.0781816% +buckets with 7 minimizer positions = 0.0541282% +buckets with 8 minimizer positions = 0.0394014% +buckets with 9 minimizer positions = 0.0296871% +buckets with 10 minimizer positions = 0.0233579% +buckets with 11 minimizer positions = 0.0188211% +buckets with 12 minimizer positions = 0.0152856% +buckets with 13 minimizer positions = 0.0127754% +buckets with 14 minimizer positions = 0.0106204% +buckets with 15 minimizer positions = 0.00907826% +buckets with 16 minimizer positions = 0.0079457% +max_bucket_size = 22085 +=== step 7 (build sparse and skew index): 16.7252 [sec] (6.67493 [ns/kmer]) +=== total time: 190.505 [sec] (76.0294 [ns/kmer]) +total index size: 3135788878 [B] -- 3135.79 [MB] +SPACE BREAKDOWN: + mphf: 0.523236 [bits/kmer] (2.83641 [bits/key]) -- 5.2262% + strings_offsets: 0.153147 [bits/kmer] -- 1.52966% + control_codewords: 6.08754 [bits/kmer] -- 60.8038% + mid_load_buckets: 0.678982 [bits/kmer] -- 6.78183% + begin_buckets_of_size: 8.55656e-07 [bits/kmer] -- 8.54649e-06% + strings: 2.24545 [bits/kmer] -- 22.4281% + skew_index: 0.323422 [bits/kmer] -- 3.23041% + weights: 5.87466e-07 [bits/kmer] -- 5.86774e-06% + -------------- + total: 10.0118 [bits/kmer] +2025-11-10 23:56:50: saving data structure to disk... +2025-11-10 23:57:04: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/axolotl.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k31.canon.sshash --canonical +2025-11-10 23:57:04: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/axolotl.k31.eulertigs.fa.gz'... +read 1000000 sequences, 129490969 bases, 99490969 kmers +read 2000000 sequences, 259406781 bases, 199406781 kmers +read 3000000 sequences, 390247383 bases, 300247383 kmers +read 4000000 sequences, 521879077 bases, 401879077 kmers +read 5000000 sequences, 654383925 bases, 504383925 kmers +read 6000000 sequences, 787748040 bases, 607748040 kmers +read 7000000 sequences, 921978207 bases, 711978207 kmers +read 8000000 sequences, 1056572544 bases, 816572544 kmers +read 9000000 sequences, 1192074067 bases, 922074067 kmers +read 10000000 sequences, 1328585611 bases, 1028585611 kmers +read 11000000 sequences, 1466068193 bases, 1136068193 kmers +read 12000000 sequences, 1604189526 bases, 1244189526 kmers +read 13000000 sequences, 1743096378 bases, 1353096378 kmers +read 14000000 sequences, 1882900221 bases, 1462900221 kmers +read 15000000 sequences, 2023699884 bases, 1573699884 kmers +read 16000000 sequences, 2165862057 bases, 1685862057 kmers +read 17000000 sequences, 2309661964 bases, 1799661964 kmers +read 18000000 sequences, 2454684974 bases, 1914684974 kmers +read 19000000 sequences, 2601025569 bases, 2031025569 kmers +read 20000000 sequences, 2749275100 bases, 2149275100 kmers +read 21000000 sequences, 2898804831 bases, 2268804831 kmers +read 22000000 sequences, 3049858713 bases, 2389858713 kmers +read 23000000 sequences, 3203100307 bases, 2513100307 kmers +read 24000000 sequences, 3357729055 bases, 2637729055 kmers +read 25000000 sequences, 3514381494 bases, 2764381494 kmers +read 26000000 sequences, 3673423010 bases, 2893423010 kmers +read 27000000 sequences, 3833876723 bases, 3023876723 kmers +read 28000000 sequences, 3997541031 bases, 3157541031 kmers +read 29000000 sequences, 4163233674 bases, 3293233674 kmers +read 30000000 sequences, 4331783042 bases, 3431783042 kmers +read 31000000 sequences, 4504035303 bases, 3574035303 kmers +read 32000000 sequences, 4679090399 bases, 3719090399 kmers +read 33000000 sequences, 4858436877 bases, 3868436877 kmers +read 34000000 sequences, 5041637740 bases, 4021637740 kmers +read 35000000 sequences, 5229976324 bases, 4179976324 kmers +read 36000000 sequences, 5423490866 bases, 4343490866 kmers +read 37000000 sequences, 5622911177 bases, 4512911177 kmers +read 38000000 sequences, 5828307256 bases, 4688307256 kmers +read 39000000 sequences, 6041904282 bases, 4871904282 kmers +read 40000000 sequences, 6264018359 bases, 5064018359 kmers +read 41000000 sequences, 6497175042 bases, 5267175042 kmers +read 42000000 sequences, 6743365189 bases, 5483365189 kmers +read 43000000 sequences, 7005667896 bases, 5715667896 kmers +read 44000000 sequences, 7289398126 bases, 5969398126 kmers +read 45000000 sequences, 7600998045 bases, 6250998045 kmers +read 46000000 sequences, 7846412811 bases, 6466412811 kmers +read 47000000 sequences, 7950267050 bases, 6540267050 kmers +read 48000000 sequences, 8054449490 bases, 6614449490 kmers +read 49000000 sequences, 8158503893 bases, 6688503893 kmers +read 50000000 sequences, 8262623298 bases, 6762623298 kmers +read 51000000 sequences, 8366491419 bases, 6836491419 kmers +read 52000000 sequences, 8470360694 bases, 6910360694 kmers +read 53000000 sequences, 8574554064 bases, 6984554064 kmers +read 54000000 sequences, 8678752038 bases, 7058752038 kmers +read 55000000 sequences, 8783318704 bases, 7133318704 kmers +read 56000000 sequences, 8887348929 bases, 7207348929 kmers +read 57000000 sequences, 8991244574 bases, 7281244574 kmers +read 58000000 sequences, 9095727588 bases, 7355727588 kmers +read 59000000 sequences, 9200181084 bases, 7430181084 kmers +read 60000000 sequences, 9304538500 bases, 7504538500 kmers +read 61000000 sequences, 9409280029 bases, 7579280029 kmers +read 62000000 sequences, 9513939845 bases, 7653939845 kmers +read 63000000 sequences, 9618472370 bases, 7728472370 kmers +read 64000000 sequences, 9723234224 bases, 7803234224 kmers +read 65000000 sequences, 9827751954 bases, 7877751954 kmers +read 66000000 sequences, 9932020550 bases, 7952020550 kmers +read 67000000 sequences, 10036741400 bases, 8026741400 kmers +read 68000000 sequences, 10141739679 bases, 8101739679 kmers +read 69000000 sequences, 10246720968 bases, 8176720968 kmers +read 70000000 sequences, 10351924281 bases, 8251924281 kmers +read 71000000 sequences, 10456872741 bases, 8326872741 kmers +read 72000000 sequences, 10562347711 bases, 8402347711 kmers +read 73000000 sequences, 10667420487 bases, 8477420487 kmers +read 74000000 sequences, 10772671579 bases, 8552671579 kmers +read 75000000 sequences, 10877868233 bases, 8627868233 kmers +read 76000000 sequences, 10983492844 bases, 8703492844 kmers +read 77000000 sequences, 11089073361 bases, 8779073361 kmers +read 78000000 sequences, 11194470848 bases, 8854470848 kmers +read 79000000 sequences, 11300332559 bases, 8930332559 kmers +read 80000000 sequences, 11406269822 bases, 9006269822 kmers +read 81000000 sequences, 11512162907 bases, 9082162907 kmers +read 82000000 sequences, 11618219813 bases, 9158219813 kmers +read 83000000 sequences, 11724078742 bases, 9234078742 kmers +read 84000000 sequences, 11830014073 bases, 9310014073 kmers +read 85000000 sequences, 11935890790 bases, 9385890790 kmers +read 86000000 sequences, 12042232134 bases, 9462232134 kmers +read 87000000 sequences, 12148382832 bases, 9538382832 kmers +read 88000000 sequences, 12254822411 bases, 9614822411 kmers +read 89000000 sequences, 12361563673 bases, 9691563673 kmers +read 90000000 sequences, 12468297930 bases, 9768297930 kmers +read 91000000 sequences, 12574912907 bases, 9844912907 kmers +read 92000000 sequences, 12681494065 bases, 9921494065 kmers +read 93000000 sequences, 12788194017 bases, 9998194017 kmers +read 94000000 sequences, 12894915381 bases, 10074915381 kmers +read 95000000 sequences, 13001521631 bases, 10151521631 kmers +read 96000000 sequences, 13109064835 bases, 10229064835 kmers +read 97000000 sequences, 13215873113 bases, 10305873113 kmers +read 98000000 sequences, 13323125980 bases, 10383125980 kmers +read 99000000 sequences, 13430215641 bases, 10460215641 kmers +read 100000000 sequences, 13537778344 bases, 10537778344 kmers +read 101000000 sequences, 13645148965 bases, 10615148965 kmers +read 102000000 sequences, 13752520277 bases, 10692520277 kmers +read 103000000 sequences, 13860127249 bases, 10770127249 kmers +read 104000000 sequences, 13968075956 bases, 10848075956 kmers +read 105000000 sequences, 14075861146 bases, 10925861146 kmers +read 106000000 sequences, 14184298738 bases, 11004298738 kmers +read 107000000 sequences, 14292530270 bases, 11082530270 kmers +read 108000000 sequences, 14400943968 bases, 11160943968 kmers +read 109000000 sequences, 14509339935 bases, 11239339935 kmers +read 110000000 sequences, 14617599335 bases, 11317599335 kmers +read 111000000 sequences, 14725842174 bases, 11395842174 kmers +read 112000000 sequences, 14834254989 bases, 11474254989 kmers +read 113000000 sequences, 14942804338 bases, 11552804338 kmers +read 114000000 sequences, 15051637733 bases, 11631637733 kmers +read 115000000 sequences, 15160739429 bases, 11710739429 kmers +read 116000000 sequences, 15269978487 bases, 11789978487 kmers +read 117000000 sequences, 15378990148 bases, 11868990148 kmers +read 118000000 sequences, 15488236558 bases, 11948236558 kmers +read 119000000 sequences, 15598141514 bases, 12028141514 kmers +read 120000000 sequences, 15707567895 bases, 12107567895 kmers +read 121000000 sequences, 15817378418 bases, 12187378418 kmers +read 122000000 sequences, 15927205756 bases, 12267205756 kmers +read 123000000 sequences, 16037448540 bases, 12347448540 kmers +read 124000000 sequences, 16147936166 bases, 12427936166 kmers +read 125000000 sequences, 16258467588 bases, 12508467588 kmers +read 126000000 sequences, 16368795492 bases, 12588795492 kmers +read 127000000 sequences, 16479785214 bases, 12669785214 kmers +read 128000000 sequences, 16590282249 bases, 12750282249 kmers +read 129000000 sequences, 16701057677 bases, 12831057677 kmers +read 130000000 sequences, 16812553081 bases, 12912553081 kmers +read 131000000 sequences, 16923449047 bases, 12993449047 kmers +read 132000000 sequences, 17034230526 bases, 13074230526 kmers +read 133000000 sequences, 17145713815 bases, 13155713815 kmers +read 134000000 sequences, 17257389525 bases, 13237389525 kmers +read 135000000 sequences, 17369130838 bases, 13319130838 kmers +read 136000000 sequences, 17481314596 bases, 13401314596 kmers +read 137000000 sequences, 17593629072 bases, 13483629072 kmers +read 138000000 sequences, 17706229726 bases, 13566229726 kmers +read 139000000 sequences, 17818270886 bases, 13648270886 kmers +read 140000000 sequences, 17931420451 bases, 13731420451 kmers +read 141000000 sequences, 18044353871 bases, 13814353871 kmers +read 142000000 sequences, 18157903240 bases, 13897903240 kmers +read 143000000 sequences, 18271415292 bases, 13981415292 kmers +read 144000000 sequences, 18384555504 bases, 14064555504 kmers +read 145000000 sequences, 18498314118 bases, 14148314118 kmers +read 146000000 sequences, 18612140169 bases, 14232140169 kmers +read 147000000 sequences, 18726166960 bases, 14316166960 kmers +read 148000000 sequences, 18840350948 bases, 14400350948 kmers +read 149000000 sequences, 18954724883 bases, 14484724883 kmers +read 150000000 sequences, 19069173447 bases, 14569173447 kmers +read 151000000 sequences, 19183793062 bases, 14653793062 kmers +read 152000000 sequences, 19298944468 bases, 14738944468 kmers +read 153000000 sequences, 19414656615 bases, 14824656615 kmers +read 154000000 sequences, 19530428728 bases, 14910428728 kmers +read 155000000 sequences, 19646614327 bases, 14996614327 kmers +read 156000000 sequences, 19762624488 bases, 15082624488 kmers +read 157000000 sequences, 19879115632 bases, 15169115632 kmers +read 158000000 sequences, 19995793294 bases, 15255793294 kmers +read 159000000 sequences, 20112771576 bases, 15342771576 kmers +read 160000000 sequences, 20230295571 bases, 15430295571 kmers +read 161000000 sequences, 20347943202 bases, 15517943202 kmers +read 162000000 sequences, 20465629376 bases, 15605629376 kmers +read 163000000 sequences, 20583555678 bases, 15693555678 kmers +read 164000000 sequences, 20701871322 bases, 15781871322 kmers +read 165000000 sequences, 20820410777 bases, 15870410777 kmers +read 166000000 sequences, 20939977253 bases, 15959977253 kmers +read 167000000 sequences, 21059538950 bases, 16049538950 kmers +read 168000000 sequences, 21179653875 bases, 16139653875 kmers +read 169000000 sequences, 21300125826 bases, 16230125826 kmers +read 170000000 sequences, 21421275644 bases, 16321275644 kmers +read 171000000 sequences, 21542230532 bases, 16412230532 kmers +read 172000000 sequences, 21663705881 bases, 16503705881 kmers +read 173000000 sequences, 21785083912 bases, 16595083912 kmers +read 174000000 sequences, 21907256668 bases, 16687256668 kmers +read 175000000 sequences, 22029743385 bases, 16779743385 kmers +read 176000000 sequences, 22152051369 bases, 16872051369 kmers +read 177000000 sequences, 22275733581 bases, 16965733581 kmers +read 178000000 sequences, 22399701749 bases, 17059701749 kmers +read 179000000 sequences, 22524206009 bases, 17154206009 kmers +read 180000000 sequences, 22648755725 bases, 17248755725 kmers +read 181000000 sequences, 22773928274 bases, 17343928274 kmers +read 182000000 sequences, 22899987762 bases, 17439987762 kmers +read 183000000 sequences, 23025485159 bases, 17535485159 kmers +read 184000000 sequences, 23152188885 bases, 17632188885 kmers +read 185000000 sequences, 23279392102 bases, 17729392102 kmers +read 186000000 sequences, 23406527093 bases, 17826527093 kmers +read 187000000 sequences, 23534989577 bases, 17924989577 kmers +read 187636048 sequences, 23617016620 bases, 17987935180 kmers +num_kmers 17987935180 +cost: 2.0 + 0.625873 [bits/kmer] +max string length = 38851 +num bits per_absolute_offset = 35 +num bits per_relative_offset = 16 +num bits per_string_id = 28 +=== step 1 (encode strings): 74.4692 [sec] (4.13995 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.64.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.65.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.66.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.67.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.68.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.69.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.70.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.71.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.72.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.73.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.74.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.75.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.76.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.77.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.78.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.79.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.80.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.81.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.82.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.83.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.84.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.85.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.86.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.87.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.88.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.89.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.90.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.91.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.92.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.93.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.94.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.95.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.96.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.97.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.98.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.99.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.100.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.101.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.102.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.103.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.104.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.105.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.106.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.107.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.108.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.109.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.110.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.111.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.112.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.113.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.114.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.115.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.116.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.117.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.118.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.119.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.120.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.121.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.122.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.123.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.124.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.125.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.126.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.127.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.128.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.129.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.130.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.131.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.132.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.133.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.134.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.135.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.136.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.137.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.138.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.139.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.140.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.141.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.142.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.143.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.144.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.145.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.146.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.147.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.148.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.149.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.150.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.151.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.152.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.153.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.154.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.155.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.156.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.157.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.158.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.159.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.160.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.161.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.162.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.163.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.164.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.165.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.166.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.167.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.168.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.169.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.170.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.171.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.172.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.173.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.174.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.175.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.176.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.177.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.178.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.179.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.180.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.181.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.182.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.183.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.184.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.185.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.186.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.187.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.188.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.189.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.190.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.191.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.192.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.193.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.194.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.195.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.196.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.197.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.198.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.199.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.200.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.201.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.202.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.203.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.204.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.205.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.206.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.207.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.208.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.209.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.210.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.211.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.212.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.213.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.214.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.215.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.216.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.217.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.218.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.219.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.220.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.221.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.222.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.223.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.224.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.225.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.226.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.227.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.228.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.229.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.230.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.231.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.232.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.233.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.234.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.235.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.236.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.237.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.238.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.239.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.240.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.241.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.242.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.243.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.244.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.245.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.246.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.247.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.248.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.249.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.250.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.251.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.252.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.253.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.254.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.255.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.256.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.257.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.258.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.259.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.260.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.261.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.262.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.263.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.264.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.265.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.266.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.267.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.268.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.269.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.270.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.271.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.272.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.273.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.274.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.275.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.276.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.277.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.278.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.279.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.280.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.281.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.282.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.283.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.284.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.285.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.286.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.287.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.288.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.289.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.290.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.291.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.292.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.293.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.294.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.295.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.296.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.297.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.298.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.299.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.300.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.301.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.302.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.303.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.304.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.305.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.306.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.307.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.308.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.309.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.310.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.311.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.312.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.313.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.314.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.315.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.316.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.317.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.318.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.319.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.320.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.321.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.322.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.323.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.324.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.325.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.326.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.327.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.328.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.329.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.330.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.331.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.332.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.333.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.334.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.335.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.336.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.337.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.338.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.339.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.340.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.341.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.342.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.343.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.344.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.345.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.346.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.347.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.348.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.349.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.350.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.351.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.352.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.353.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.354.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.355.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.356.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.357.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.358.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.359.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.360.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.361.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.362.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.363.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.364.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.365.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.366.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.367.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.368.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.369.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.370.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.371.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.372.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.373.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.374.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.375.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.376.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.377.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.378.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.379.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.380.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.381.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.382.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.383.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.384.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.385.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.386.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.387.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.388.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.389.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.390.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.391.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.392.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.393.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.394.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.395.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.396.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.397.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.398.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.399.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.400.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.401.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.402.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.403.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.404.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.405.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.406.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.407.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.408.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.409.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.410.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.411.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.412.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.413.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.414.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.415.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.416.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.417.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.418.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.419.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.420.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.421.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.422.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.423.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.424.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.425.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.426.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.427.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.428.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.429.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.430.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.431.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.432.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.433.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.434.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.435.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.436.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.437.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.438.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.439.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.440.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.441.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.442.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.443.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.444.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.445.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.446.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.447.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.448.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.449.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.450.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.451.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.452.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.453.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.454.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.455.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.456.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.457.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.458.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.459.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.460.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.461.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.462.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.463.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.464.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.465.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.466.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.467.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.468.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.469.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.470.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.471.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.472.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.473.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.474.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.475.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.476.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.477.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.478.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.479.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.480.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.481.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.482.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.483.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.484.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.485.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.486.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.487.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.488.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.489.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.490.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.491.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.492.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.493.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.494.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.495.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.496.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.497.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.498.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.499.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.500.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.501.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.502.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.503.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.504.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.505.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.506.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.507.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.508.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.509.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.510.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.511.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.512.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.513.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.514.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.515.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.516.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.517.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.518.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.519.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.520.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.521.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.522.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.523.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.524.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.525.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.526.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.527.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.528.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.529.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.530.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.531.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.532.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.533.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.534.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.535.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.536.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.537.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.538.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.539.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.540.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.541.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.542.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.543.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.544.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.545.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.546.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.547.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.548.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.549.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.550.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.551.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.552.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.553.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.554.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.555.bin'... +=== step 2 (compute minimizer tuples): 132.929 [sec] (7.38992 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +processed 700000000 minimizer tuples +processed 800000000 minimizer tuples +processed 900000000 minimizer tuples +processed 1000000000 minimizer tuples +processed 1100000000 minimizer tuples +processed 1200000000 minimizer tuples +processed 1300000000 minimizer tuples +processed 1400000000 minimizer tuples +processed 1500000000 minimizer tuples +processed 1600000000 minimizer tuples +processed 1700000000 minimizer tuples +processed 1800000000 minimizer tuples +processed 1900000000 minimizer tuples +processed 2000000000 minimizer tuples +processed 2100000000 minimizer tuples +processed 2200000000 minimizer tuples +processed 2300000000 minimizer tuples +processed 2400000000 minimizer tuples +processed 2500000000 minimizer tuples +processed 2600000000 minimizer tuples +processed 2700000000 minimizer tuples +processed 2800000000 minimizer tuples +processed 2900000000 minimizer tuples +processed 3000000000 minimizer tuples +processed 3100000000 minimizer tuples +processed 3200000000 minimizer tuples +processed 3300000000 minimizer tuples +processed 3400000000 minimizer tuples +processed 3500000000 minimizer tuples +processed 3600000000 minimizer tuples +processed 3700000000 minimizer tuples +processed 3800000000 minimizer tuples +processed 3900000000 minimizer tuples +=== step 3 (merging minimizer tuples): 311.499 [sec] (17.3171 [ns/kmer]) +num_minimizers = 3007078089 +num_minimizer_positions = 3771612342 +num_super_kmers = 3913697473 +building minimizers MPHF with 64 threads and 1003 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 271.387 [sec] (15.0872 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.8.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 283.505 [sec] (15.7609 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762815424875733578.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +processed 700000000 minimizer tuples +processed 800000000 minimizer tuples +processed 900000000 minimizer tuples +processed 1000000000 minimizer tuples +processed 1100000000 minimizer tuples +processed 1200000000 minimizer tuples +processed 1300000000 minimizer tuples +processed 1400000000 minimizer tuples +processed 1500000000 minimizer tuples +processed 1600000000 minimizer tuples +processed 1700000000 minimizer tuples +processed 1800000000 minimizer tuples +processed 1900000000 minimizer tuples +processed 2000000000 minimizer tuples +processed 2100000000 minimizer tuples +processed 2200000000 minimizer tuples +processed 2300000000 minimizer tuples +processed 2400000000 minimizer tuples +processed 2500000000 minimizer tuples +processed 2600000000 minimizer tuples +processed 2700000000 minimizer tuples +processed 2800000000 minimizer tuples +processed 2900000000 minimizer tuples +processed 3000000000 minimizer tuples +processed 3100000000 minimizer tuples +processed 3200000000 minimizer tuples +processed 3300000000 minimizer tuples +processed 3400000000 minimizer tuples +processed 3500000000 minimizer tuples +processed 3600000000 minimizer tuples +processed 3700000000 minimizer tuples +processed 3800000000 minimizer tuples +processed 3900000000 minimizer tuples +=== step 6 (merging minimizers tuples): 409.149 [sec] (22.7457 [ns/kmer]) +num_bits_per_offset = 35 +num_buckets_larger_than_1_not_in_skew_index 259767459/3007078089 (8.63853%) +num_buckets_in_skew_index 621186/3007078089 (0.0206575%) +max_bucket_size 96569 +log2_max_bucket_size 17 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 942584227/3771612342 (24.9915%) +num_minimizer_positions_of_buckets_in_skew_index 82338671/3771612342 (2.18312%) +=== step 7.1 (build sparse index): 72.5609 [sec] (4.03386 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 170796088 + partition = 1: num kmers in buckets of size > 128 and <= 256: 101178141 + partition = 2: num kmers in buckets of size > 256 and <= 512: 52619176 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 22739211 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 8521738 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 2988490 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 1453237 + partition = 7: num kmers in buckets of size > 8192 and <= 96569: 1063107 +num kmers in skew index = 361359188 (2.0089%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 170796088 + building MPHF with 64 threads and 57 partitions (avg. partition size = 3000000)... + built mphs[0] for 170796088 kmers; bits/key = 2.55471 + built positions[0] for 170796088 kmers; bits/key = 7 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 101178141 + building MPHF with 64 threads and 34 partitions (avg. partition size = 3000000)... + built mphs[1] for 101178141 kmers; bits/key = 2.55592 + built positions[1] for 101178141 kmers; bits/key = 8 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 52619176 + building MPHF with 64 threads and 18 partitions (avg. partition size = 3000000)... + built mphs[2] for 52619176 kmers; bits/key = 2.52635 + built positions[2] for 52619176 kmers; bits/key = 9.00001 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 22739211 + building MPHF with 64 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[3] for 22739211 kmers; bits/key = 2.52777 + built positions[3] for 22739211 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 8521738 + building MPHF with 64 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[4] for 8521738 kmers; bits/key = 2.52914 + built positions[4] for 8521738 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 2988490 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2988490)... + built mphs[5] for 2988490 kmers; bits/key = 2.55981 + built positions[5] for 2988490 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 1453237 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1453237)... + built mphs[6] for 1453237 kmers; bits/key = 2.56054 + built positions[6] for 1453237 kmers; bits/key = 13.0002 + lower = 8192; upper = 96569; num_bits_per_pos = 17; num_kmers_in_partition = 1063107 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1063107)... + built mphs[7] for 1063107 kmers; bits/key = 2.56108 + built positions[7] for 1063107 kmers; bits/key = 17.0003 +=== step 7.2 (build skew index): 56.6652 [sec] (3.15018 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 91.3408% +buckets with 2 minimizer positions = 5.0388% +buckets with 3 minimizer positions = 1.50349% +buckets with 4 minimizer positions = 0.686099% +buckets with 5 minimizer positions = 0.383447% +buckets with 6 minimizer positions = 0.239348% +buckets with 7 minimizer positions = 0.160991% +buckets with 8 minimizer positions = 0.114476% +buckets with 9 minimizer positions = 0.0847425% +buckets with 10 minimizer positions = 0.0647212% +buckets with 11 minimizer positions = 0.0507236% +buckets with 12 minimizer positions = 0.0406309% +buckets with 13 minimizer positions = 0.0330582% +buckets with 14 minimizer positions = 0.0273473% +buckets with 15 minimizer positions = 0.0229519% +buckets with 16 minimizer positions = 0.0194797% +max_bucket_size = 96569 +=== step 7 (build sparse and skew index): 138.007 [sec] (7.67218 [ns/kmer]) +=== total time: 1620.94 [sec] (90.1129 [ns/kmer]) +total index size: 26027317465 [B] -- 26027.3 [MB] +SPACE BREAKDOWN: + mphf: 0.472648 [bits/kmer] (2.82732 [bits/key]) -- 4.0832% + strings_offsets: 0.253608 [bits/kmer] -- 2.19091% + control_codewords: 6.01819 [bits/kmer] -- 51.991% + mid_load_buckets: 1.83403 [bits/kmer] -- 15.8441% + begin_buckets_of_size: 1.19191e-07 [bits/kmer] -- 1.02969e-06% + strings: 2.62587 [bits/kmer] -- 22.6848% + skew_index: 0.371103 [bits/kmer] -- 3.20595% + weights: 8.18326e-08 [bits/kmer] -- 7.0695e-07% + -------------- + total: 11.5755 [bits/kmer] +2025-11-11 00:24:05: saving data structure to disk... +2025-11-11 00:26:06: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.canon.sshash --canonical +2025-11-11 00:26:07: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/hprc.k31.eulertigs.fa.gz'... +read 1000000 sequences, 159860354 bases, 129860354 kmers +read 2000000 sequences, 345519042 bases, 285519042 kmers +read 3000000 sequences, 569210425 bases, 479210425 kmers +read 4000000 sequences, 848332212 bases, 728332212 kmers +read 5000000 sequences, 1226889961 bases, 1076889961 kmers +read 6000000 sequences, 1800462808 bases, 1620462808 kmers +read 7000000 sequences, 1906975392 bases, 1696975392 kmers +read 8000000 sequences, 1964117272 bases, 1724117272 kmers +read 9000000 sequences, 2021346703 bases, 1751346703 kmers +read 10000000 sequences, 2078777105 bases, 1778777105 kmers +read 11000000 sequences, 2136245853 bases, 1806245853 kmers +read 12000000 sequences, 2193864516 bases, 1833864516 kmers +read 13000000 sequences, 2251713140 bases, 1861713140 kmers +read 14000000 sequences, 2309685311 bases, 1889685311 kmers +read 15000000 sequences, 2367830861 bases, 1917830861 kmers +read 16000000 sequences, 2426185107 bases, 1946185107 kmers +read 17000000 sequences, 2484756357 bases, 1974756357 kmers +read 18000000 sequences, 2543560790 bases, 2003560790 kmers +read 19000000 sequences, 2602544828 bases, 2032544828 kmers +read 20000000 sequences, 2661829332 bases, 2061829332 kmers +read 21000000 sequences, 2721408473 bases, 2091408473 kmers +read 22000000 sequences, 2781228842 bases, 2121228842 kmers +read 23000000 sequences, 2841415119 bases, 2151415119 kmers +read 24000000 sequences, 2901936379 bases, 2181936379 kmers +read 25000000 sequences, 2962750749 bases, 2212750749 kmers +read 26000000 sequences, 3023914429 bases, 2243914429 kmers +read 27000000 sequences, 3085556058 bases, 2275556058 kmers +read 28000000 sequences, 3147523815 bases, 2307523815 kmers +read 29000000 sequences, 3209891758 bases, 2339891758 kmers +read 30000000 sequences, 3272761181 bases, 2372761181 kmers +read 31000000 sequences, 3336150965 bases, 2406150965 kmers +read 32000000 sequences, 3400254734 bases, 2440254734 kmers +read 33000000 sequences, 3464886783 bases, 2474886783 kmers +read 34000000 sequences, 3530247184 bases, 2510247184 kmers +read 35000000 sequences, 3596273843 bases, 2546273843 kmers +read 36000000 sequences, 3663044813 bases, 2583044813 kmers +read 37000000 sequences, 3730743513 bases, 2620743513 kmers +read 38000000 sequences, 3799297920 bases, 2659297920 kmers +read 39000000 sequences, 3869022100 bases, 2699022100 kmers +read 40000000 sequences, 3939899906 bases, 2739899906 kmers +read 41000000 sequences, 4011944353 bases, 2781944353 kmers +read 42000000 sequences, 4085447760 bases, 2825447760 kmers +read 43000000 sequences, 4160667187 bases, 2870667187 kmers +read 44000000 sequences, 4237696486 bases, 2917696486 kmers +read 45000000 sequences, 4316730755 bases, 2966730755 kmers +read 46000000 sequences, 4398064724 bases, 3018064724 kmers +read 47000000 sequences, 4482251464 bases, 3072251464 kmers +read 48000000 sequences, 4569570617 bases, 3129570617 kmers +read 49000000 sequences, 4660631625 bases, 3190631625 kmers +read 50000000 sequences, 4756246344 bases, 3256246344 kmers +read 51000000 sequences, 4856753463 bases, 3326753463 kmers +read 52000000 sequences, 4964398717 bases, 3404398717 kmers +read 53000000 sequences, 5079791551 bases, 3489791551 kmers +read 54000000 sequences, 5205070836 bases, 3585070836 kmers +read 55000000 sequences, 5343495625 bases, 3693495625 kmers +read 55207753 sequences, 5374353539 bases, 3718120949 kmers +num_kmers 3718120949 +cost: 2.0 + 0.890898 [bits/kmer] +max string length = 17920 +num bits per_absolute_offset = 33 +num bits per_relative_offset = 15 +num bits per_string_id = 26 +=== step 1 (encode strings): 16.8603 [sec] (4.53463 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.64.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.65.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.66.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.67.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.68.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.69.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.70.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.71.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.72.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.73.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.74.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.75.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.76.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.77.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.78.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.79.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.80.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.81.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.82.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.83.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.84.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.85.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.86.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.87.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.88.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.89.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.90.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.91.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.92.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.93.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.94.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.95.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.96.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.97.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.98.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.99.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.100.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.101.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.102.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.103.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.104.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.105.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.106.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.107.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.108.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.109.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.110.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.111.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.112.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.113.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.114.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.115.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.116.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.117.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.118.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.119.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.120.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.121.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.122.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.123.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.124.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.125.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.126.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.127.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.128.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.129.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.130.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.131.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.132.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.133.bin'... +=== step 2 (compute minimizer tuples): 9.07218 [sec] (2.43999 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +processed 700000000 minimizer tuples +processed 800000000 minimizer tuples +=== step 3 (merging minimizer tuples): 60.5144 [sec] (16.2755 [ns/kmer]) +num_minimizers = 619508590 +num_minimizer_positions = 790834640 +num_super_kmers = 819080133 +building minimizers MPHF with 64 threads and 207 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 23.944 [sec] (6.43982 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.1.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 26.5527 [sec] (7.14143 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817167661601220.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +processed 700000000 minimizer tuples +processed 800000000 minimizer tuples +=== step 6 (merging minimizers tuples): 114.26 [sec] (30.7305 [ns/kmer]) +num_bits_per_offset = 33 +num_buckets_larger_than_1_not_in_skew_index 81104016/619508590 (13.0917%) +num_buckets_in_skew_index 149851/619508590 (0.0241887%) +max_bucket_size 71241 +log2_max_bucket_size 17 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 225181159/790834640 (28.4739%) +num_minimizer_positions_of_buckets_in_skew_index 27398758/790834640 (3.46454%) +=== step 7.1 (build sparse index): 15.751 [sec] (4.23628 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 35558914 + partition = 1: num kmers in buckets of size > 128 and <= 256: 26726484 + partition = 2: num kmers in buckets of size > 256 and <= 512: 19151665 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 12830346 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 8265693 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 5225188 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 3257832 + partition = 7: num kmers in buckets of size > 8192 and <= 71241: 4228970 +num kmers in skew index = 115245092 (3.09955%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 35558914 + building MPHF with 64 threads and 12 partitions (avg. partition size = 3000000)... + built mphs[0] for 35558914 kmers; bits/key = 2.57422 + built positions[0] for 35558914 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 26726484 + building MPHF with 64 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[1] for 26726484 kmers; bits/key = 2.5656 + built positions[1] for 26726484 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 19151665 + building MPHF with 64 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[2] for 19151665 kmers; bits/key = 2.60993 + built positions[2] for 19151665 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 12830346 + building MPHF with 64 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[3] for 12830346 kmers; bits/key = 2.65495 + built positions[3] for 12830346 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 8265693 + building MPHF with 64 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[4] for 8265693 kmers; bits/key = 2.59459 + built positions[4] for 8265693 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 5225188 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[5] for 5225188 kmers; bits/key = 2.63154 + built positions[5] for 5225188 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 3257832 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[6] for 3257832 kmers; bits/key = 3.31153 + built positions[6] for 3257832 kmers; bits/key = 13.0001 + lower = 8192; upper = 71241; num_bits_per_pos = 17; num_kmers_in_partition = 4228970 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[7] for 4228970 kmers; bits/key = 2.84935 + built positions[7] for 4228970 kmers; bits/key = 17.0001 +=== step 7.2 (build skew index): 16.349 [sec] (4.39711 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 86.8841% +buckets with 2 minimizer positions = 10.1608% +buckets with 3 minimizer positions = 1.5937% +buckets with 4 minimizer positions = 0.465965% +buckets with 5 minimizer positions = 0.227622% +buckets with 6 minimizer positions = 0.138122% +buckets with 7 minimizer positions = 0.0932371% +buckets with 8 minimizer positions = 0.0672932% +buckets with 9 minimizer positions = 0.0507686% +buckets with 10 minimizer positions = 0.0394317% +buckets with 11 minimizer positions = 0.0314958% +buckets with 12 minimizer positions = 0.025655% +buckets with 13 minimizer positions = 0.0213997% +buckets with 14 minimizer positions = 0.017952% +buckets with 15 minimizer positions = 0.0152708% +buckets with 16 minimizer positions = 0.0132381% +max_bucket_size = 71241 +=== step 7 (build sparse and skew index): 34.0706 [sec] (9.16338 [ns/kmer]) +=== total time: 285.274 [sec] (76.7252 [ns/kmer]) +total index size: 5544019788 [B] -- 5544.02 [MB] +SPACE BREAKDOWN: + mphf: 0.472233 [bits/kmer] (2.83421 [bits/key]) -- 3.95881% + strings_offsets: 0.300083 [bits/kmer] -- 2.51565% + control_codewords: 5.66504 [bits/kmer] -- 47.491% + mid_load_buckets: 1.99858 [bits/kmer] -- 16.7545% + begin_buckets_of_size: 5.76635e-07 [bits/kmer] -- 4.83404e-06% + strings: 2.8909 [bits/kmer] -- 24.2349% + skew_index: 0.601813 [bits/kmer] -- 5.04511% + weights: 3.95899e-07 [bits/kmer] -- 3.31889e-06% + -------------- + total: 11.9286 [bits/kmer] +2025-11-11 00:30:52: saving data structure to disk... +2025-11-11 00:31:18: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ec.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/ec.k31.canon.sshash --canonical +2025-11-11 00:31:18: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/ec.k31.eulertigs.fa.gz'... +read 1000000 sequences, 153195620 bases, 123195620 kmers +read 2000000 sequences, 238301856 bases, 178301856 kmers +read 3000000 sequences, 306827724 bases, 216827724 kmers +read 4000000 sequences, 376235441 bases, 256235441 kmers +read 5000000 sequences, 445233170 bases, 295233170 kmers +read 6000000 sequences, 515368260 bases, 335368260 kmers +read 7000000 sequences, 586116050 bases, 376116050 kmers +read 8000000 sequences, 657174193 bases, 417174193 kmers +read 9000000 sequences, 729536721 bases, 459536721 kmers +read 10000000 sequences, 802902838 bases, 502902838 kmers +read 11000000 sequences, 876372447 bases, 546372447 kmers +read 12000000 sequences, 951284053 bases, 591284053 kmers +read 13000000 sequences, 1027636701 bases, 637636701 kmers +read 14000000 sequences, 1105722693 bases, 685722693 kmers +read 15000000 sequences, 1185796892 bases, 735796892 kmers +read 16000000 sequences, 1267846293 bases, 787846293 kmers +read 17000000 sequences, 1352901026 bases, 842901026 kmers +read 18000000 sequences, 1442015880 bases, 902015880 kmers +read 19000000 sequences, 1536308350 bases, 966308350 kmers +read 20000000 sequences, 1639158516 bases, 1039158516 kmers +read 20822360 sequences, 1735689645 bases, 1111018845 kmers +num_kmers 1111018845 +cost: 2.0 + 1.1245 [bits/kmer] +max string length = 176455 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 18 +num bits per_string_id = 25 +=== step 1 (encode strings): 5.7441 [sec] (5.17012 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.64.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.65.bin'... +=== step 2 (compute minimizer tuples): 1.56241 [sec] (1.40628 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +=== step 3 (merging minimizer tuples): 13.4328 [sec] (12.0905 [ns/kmer]) +num_minimizers = 174641353 +num_minimizer_positions = 240693690 +num_super_kmers = 249228267 +building minimizers MPHF with 64 threads and 59 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 7.28747 [sec] (6.55927 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817478694665857.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 8.40751 [sec] (7.56739 [ns/kmer]) +=== step 6 (merging minimizers tuples): 32.332 [sec] (29.1012 [ns/kmer]) +num_bits_per_offset = 31 +num_buckets_larger_than_1_not_in_skew_index 20680302/174641353 (11.8416%) +num_buckets_in_skew_index 18464/174641353 (0.0105725%) +max_bucket_size 37043 +log2_max_bucket_size 16 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 82592303/240693690 (34.3143%) +num_minimizer_positions_of_buckets_in_skew_index 4158800/240693690 (1.72784%) +=== step 7.1 (build sparse index): 4.99956 [sec] (4.49997 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 4817539 + partition = 1: num kmers in buckets of size > 128 and <= 256: 2376719 + partition = 2: num kmers in buckets of size > 256 and <= 512: 1827153 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 1520019 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 1265567 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 951364 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 1017584 + partition = 7: num kmers in buckets of size > 8192 and <= 37043: 1228646 +num kmers in skew index = 15004591 (1.35053%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 4817539 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[0] for 4817539 kmers; bits/key = 2.73 + built positions[0] for 4817539 kmers; bits/key = 7.00008 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 2376719 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2376719)... + built mphs[1] for 2376719 kmers; bits/key = 2.55999 + built positions[1] for 2376719 kmers; bits/key = 8.00014 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 1827153 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1827153)... + built mphs[2] for 1827153 kmers; bits/key = 2.56021 + built positions[2] for 1827153 kmers; bits/key = 9.0002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 1520019 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1520019)... + built mphs[3] for 1520019 kmers; bits/key = 2.56043 + built positions[3] for 1520019 kmers; bits/key = 10.0002 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 1265567 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1265567)... + built mphs[4] for 1265567 kmers; bits/key = 2.56074 + built positions[4] for 1265567 kmers; bits/key = 11.0003 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 951364 + building MPHF with 64 threads and 1 partitions (avg. partition size = 951364)... + built mphs[5] for 951364 kmers; bits/key = 2.56127 + built positions[5] for 951364 kmers; bits/key = 12.0004 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 1017584 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1017584)... + built mphs[6] for 1017584 kmers; bits/key = 2.41831 + built positions[6] for 1017584 kmers; bits/key = 13.0003 + lower = 8192; upper = 37043; num_bits_per_pos = 16; num_kmers_in_partition = 1228646 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1228646)... + built mphs[7] for 1228646 kmers; bits/key = 2.56083 + built positions[7] for 1228646 kmers; bits/key = 16.0003 +=== step 7.2 (build skew index): 6.45738 [sec] (5.81212 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 88.1478% +buckets with 2 minimizer positions = 6.79213% +buckets with 3 minimizer positions = 1.92226% +buckets with 4 minimizer positions = 0.856078% +buckets with 5 minimizer positions = 0.493468% +buckets with 6 minimizer positions = 0.325915% +buckets with 7 minimizer positions = 0.234531% +buckets with 8 minimizer positions = 0.178296% +buckets with 9 minimizer positions = 0.141164% +buckets with 10 minimizer positions = 0.115104% +buckets with 11 minimizer positions = 0.0959486% +buckets with 12 minimizer positions = 0.0807615% +buckets with 13 minimizer positions = 0.0695379% +buckets with 14 minimizer positions = 0.060729% +buckets with 15 minimizer positions = 0.0531947% +buckets with 16 minimizer positions = 0.0472706% +max_bucket_size = 37043 +=== step 7 (build sparse and skew index): 12.0976 [sec] (10.8887 [ns/kmer]) +=== total time: 80.8638 [sec] (72.7835 [ns/kmer]) +total index size: 1600485113 [B] -- 1600.49 [MB] +SPACE BREAKDOWN: + mphf: 0.447872 [bits/kmer] (2.84923 [bits/key]) -- 3.88627% + strings_offsets: 0.337884 [bits/kmer] -- 2.93189% + control_codewords: 5.03009 [bits/kmer] -- 43.6471% + mid_load_buckets: 2.30452 [bits/kmer] -- 19.9968% + begin_buckets_of_size: 1.92976e-06 [bits/kmer] -- 1.67449e-05% + strings: 3.1245 [bits/kmer] -- 27.1119% + skew_index: 0.279583 [bits/kmer] -- 2.426% + weights: 1.32491e-06 [bits/kmer] -- 1.14965e-05% + -------------- + total: 11.5244 [bits/kmer] +2025-11-11 00:32:39: saving data structure to disk... +2025-11-11 00:32:46: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.canon.sshash --canonical +2025-11-11 00:32:47: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 4.57831 [sec] (5.11938 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.64.bin'... +=== step 2 (compute minimizer tuples): 1.35994 [sec] (1.52066 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +=== step 3 (merging minimizer tuples): 10.6837 [sec] (11.9463 [ns/kmer]) +num_minimizers = 143418843 +num_minimizer_positions = 193511241 +num_super_kmers = 200364919 +building minimizers MPHF with 64 threads and 48 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 5.98867 [sec] (6.69642 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817567105836865.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 6.75619 [sec] (7.55464 [ns/kmer]) +=== step 6 (merging minimizers tuples): 29.022 [sec] (32.4518 [ns/kmer]) +num_bits_per_offset = 31 +num_buckets_larger_than_1_not_in_skew_index 18123968/143418843 (12.6371%) +num_buckets_in_skew_index 15172/143418843 (0.0105788%) +max_bucket_size 30655 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 65664193/193511241 (33.933%) +num_minimizer_positions_of_buckets_in_skew_index 2567345/193511241 (1.32672%) +=== step 7.1 (build sparse index): 4.07138 [sec] (4.55254 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 4240400 + partition = 1: num kmers in buckets of size > 128 and <= 256: 1914946 + partition = 2: num kmers in buckets of size > 256 and <= 512: 1106165 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 771672 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 562721 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 475654 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 388509 + partition = 7: num kmers in buckets of size > 8192 and <= 30655: 459571 +num kmers in skew index = 9919638 (1.10919%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 4240400 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[0] for 4240400 kmers; bits/key = 2.84282 + built positions[0] for 4240400 kmers; bits/key = 7.00008 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 1914946 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1914946)... + built mphs[1] for 1914946 kmers; bits/key = 2.56017 + built positions[1] for 1914946 kmers; bits/key = 8.00019 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 1106165 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1106165)... + built mphs[2] for 1106165 kmers; bits/key = 2.56093 + built positions[2] for 1106165 kmers; bits/key = 9.00032 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 771672 + building MPHF with 64 threads and 1 partitions (avg. partition size = 771672)... + built mphs[3] for 771672 kmers; bits/key = 2.41893 + built positions[3] for 771672 kmers; bits/key = 10.0004 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 562721 + building MPHF with 64 threads and 1 partitions (avg. partition size = 562721)... + built mphs[4] for 562721 kmers; bits/key = 2.56295 + built positions[4] for 562721 kmers; bits/key = 11.0006 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 475654 + building MPHF with 64 threads and 1 partitions (avg. partition size = 475654)... + built mphs[5] for 475654 kmers; bits/key = 2.56365 + built positions[5] for 475654 kmers; bits/key = 12.0008 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 388509 + building MPHF with 64 threads and 1 partitions (avg. partition size = 388509)... + built mphs[6] for 388509 kmers; bits/key = 2.42185 + built positions[6] for 388509 kmers; bits/key = 13.0008 + lower = 8192; upper = 30655; num_bits_per_pos = 15; num_kmers_in_partition = 459571 + building MPHF with 64 threads and 1 partitions (avg. partition size = 459571)... + built mphs[7] for 459571 kmers; bits/key = 2.4208 + built positions[7] for 459571 kmers; bits/key = 15.0007 +=== step 7.2 (build skew index): 3.78132 [sec] (4.22819 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 87.3523% +buckets with 2 minimizer positions = 7.4877% +buckets with 3 minimizer positions = 2.14684% +buckets with 4 minimizer positions = 0.927917% +buckets with 5 minimizer positions = 0.508829% +buckets with 6 minimizer positions = 0.320607% +buckets with 7 minimizer positions = 0.221914% +buckets with 8 minimizer positions = 0.16449% +buckets with 9 minimizer positions = 0.128547% +buckets with 10 minimizer positions = 0.104586% +buckets with 11 minimizer positions = 0.0865967% +buckets with 12 minimizer positions = 0.0732902% +buckets with 13 minimizer positions = 0.062991% +buckets with 14 minimizer positions = 0.0543541% +buckets with 15 minimizer positions = 0.0475495% +buckets with 16 minimizer positions = 0.0409653% +max_bucket_size = 30655 +=== step 7 (build sparse and skew index): 8.38333 [sec] (9.37408 [ns/kmer]) +=== total time: 66.7721 [sec] (74.6632 [ns/kmer]) +total index size: 1287008500 [B] -- 1287.01 [MB] +SPACE BREAKDOWN: + mphf: 0.453288 [bits/kmer] (2.82655 [bits/key]) -- 3.93723% + strings_offsets: 0.333373 [bits/kmer] -- 2.89565% + control_codewords: 5.13178 [bits/kmer] -- 44.5743% + mid_load_buckets: 2.27616 [bits/kmer] -- 19.7706% + begin_buckets_of_size: 2.39738e-06 [bits/kmer] -- 2.08235e-05% + strings: 3.10303 [bits/kmer] -- 26.9527% + skew_index: 0.215227 [bits/kmer] -- 1.86944% + weights: 1.64596e-06 [bits/kmer] -- 1.42967e-05% + -------------- + total: 11.5129 [bits/kmer] +2025-11-11 00:33:53: saving data structure to disk... +2025-11-11 00:33:59: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k31.eulertigs.fa.gz -k 31 -m 19 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.canon.sshash --canonical +2025-11-11 00:34:00: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k31.eulertigs.fa.gz'... +read 1000000 sequences, 289026795 bases, 259026795 kmers +read 1645464 sequences, 425569105 bases, 376205185 kmers +num_kmers 376205185 +cost: 2.0 + 0.262431 [bits/kmer] +max string length = 234900 +num bits per_absolute_offset = 29 +num bits per_relative_offset = 18 +num bits per_string_id = 21 +=== step 1 (encode strings): 1.11357 [sec] (2.96002 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.620769 [sec] (1.65008 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.bin' +=== step 3 (merging minimizer tuples): 3.51126 [sec] (9.33337 [ns/kmer]) +num_minimizers = 61900353 +num_minimizer_positions = 66937395 +num_super_kmers = 69800288 +building minimizers MPHF with 64 threads and 21 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 2.76113 [sec] (7.33943 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817640087399618.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 2.49504 [sec] (6.63212 [ns/kmer]) +=== step 6 (merging minimizers tuples): 5.20608 [sec] (13.8384 [ns/kmer]) +num_bits_per_offset = 29 +num_buckets_larger_than_1_not_in_skew_index 3624138/61900353 (5.85479%) +num_buckets_in_skew_index 39/61900353 (6.30045e-05%) +max_bucket_size 536 +log2_max_bucket_size 10 +num_partitions in skew index 4 +num_minimizer_positions_of_buckets_larger_than_1 8656439/66937395 (12.9321%) +num_minimizer_positions_of_buckets_in_skew_index 4780/66937395 (0.007141%) +=== step 7.1 (build sparse index): 0.960968 [sec] (2.55437 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 13913 + partition = 1: num kmers in buckets of size > 128 and <= 256: 2852 + partition = 2: num kmers in buckets of size > 256 and <= 512: 3765 + partition = 3: num kmers in buckets of size > 512 and <= 536: 1800 +num kmers in skew index = 22330 (0.00593559%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 13913 + building MPHF with 64 threads and 1 partitions (avg. partition size = 13913)... + built mphs[0] for 13913 kmers; bits/key = 2.42421 + built positions[0] for 13913 kmers; bits/key = 7.02422 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 2852 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2852)... + built mphs[1] for 2852 kmers; bits/key = 3.01823 + built positions[1] for 2852 kmers; bits/key = 8.12342 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 3765 + building MPHF with 64 threads and 1 partitions (avg. partition size = 3765)... + built mphs[2] for 3765 kmers; bits/key = 2.85578 + built positions[2] for 3765 kmers; bits/key = 9.09429 + lower = 512; upper = 536; num_bits_per_pos = 10; num_kmers_in_partition = 1800 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1800)... + built mphs[3] for 1800 kmers; bits/key = 3.48444 + built positions[3] for 1800 kmers; bits/key = 10.2044 +=== step 7.2 (build skew index): 0.039044 [sec] (0.103784 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 94.1451% +buckets with 2 minimizer positions = 4.50452% +buckets with 3 minimizer positions = 0.894247% +buckets with 4 minimizer positions = 0.259036% +buckets with 5 minimizer positions = 0.0977103% +buckets with 6 minimizer positions = 0.0440321% +buckets with 7 minimizer positions = 0.0222034% +buckets with 8 minimizer positions = 0.0119789% +buckets with 9 minimizer positions = 0.00695473% +buckets with 10 minimizer positions = 0.00425684% +buckets with 11 minimizer positions = 0.00288528% +buckets with 12 minimizer positions = 0.00173505% +buckets with 13 minimizer positions = 0.00116639% +buckets with 14 minimizer positions = 0.000827136% +buckets with 15 minimizer positions = 0.000639738% +buckets with 16 minimizer positions = 0.000463648% +max_bucket_size = 536 +=== step 7 (build sparse and skew index): 1.16437 [sec] (3.09504 [ns/kmer]) +=== total time: 16.8722 [sec] (44.8485 [ns/kmer]) +total index size: 398891769 [B] -- 398.892 [MB] +SPACE BREAKDOWN: + mphf: 0.472086 [bits/kmer] (2.86915 [bits/key]) -- 5.56546% + strings_offsets: 0.14346 [bits/kmer] -- 1.69126% + control_codewords: 4.93617 [bits/kmer] -- 58.1928% + mid_load_buckets: 0.667288 [bits/kmer] -- 7.8667% + begin_buckets_of_size: 5.69902e-06 [bits/kmer] -- 6.71861e-05% + strings: 2.26243 [bits/kmer] -- 26.672% + skew_index: 0.000988652 [bits/kmer] -- 0.0116553% + weights: 3.91276e-06 [bits/kmer] -- 4.61278e-05% + -------------- + total: 8.48243 [bits/kmer] +2025-11-11 00:34:16: saving data structure to disk... +2025-11-11 00:34:18: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/jgi_fungi.batch-0.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k31.canon.sshash --canonical +2025-11-11 00:34:18: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/jgi_fungi.batch-0.k31.eulertigs.fa.gz'... +read 1000000 sequences, 686017631 bases, 656017631 kmers +read 2000000 sequences, 1516930736 bases, 1456930736 kmers +read 3000000 sequences, 1671624364 bases, 1581624364 kmers +read 4000000 sequences, 1825353707 bases, 1705353707 kmers +read 5000000 sequences, 1981022079 bases, 1831022079 kmers +read 6000000 sequences, 2135517340 bases, 1955517340 kmers +read 7000000 sequences, 2290855053 bases, 2080855053 kmers +read 8000000 sequences, 2446475973 bases, 2206475973 kmers +read 9000000 sequences, 2602386528 bases, 2332386528 kmers +read 10000000 sequences, 2759608735 bases, 2459608735 kmers +read 11000000 sequences, 2916635557 bases, 2586635557 kmers +read 12000000 sequences, 3075074548 bases, 2715074548 kmers +read 13000000 sequences, 3231174451 bases, 2841174451 kmers +read 14000000 sequences, 3387702724 bases, 2967702724 kmers +read 15000000 sequences, 3546041675 bases, 3096041675 kmers +read 16000000 sequences, 3705485347 bases, 3225485347 kmers +read 17000000 sequences, 3864338567 bases, 3354338567 kmers +read 18000000 sequences, 4024214257 bases, 3484214257 kmers +read 19000000 sequences, 4185860468 bases, 3615860468 kmers +read 20000000 sequences, 4346092632 bases, 3746092632 kmers +read 21000000 sequences, 4506820088 bases, 3876820088 kmers +read 22000000 sequences, 4669576501 bases, 4009576501 kmers +read 23000000 sequences, 4833065439 bases, 4143065439 kmers +read 24000000 sequences, 4995788985 bases, 4275788985 kmers +read 25000000 sequences, 5161157096 bases, 4411157096 kmers +read 26000000 sequences, 5326624598 bases, 4546624598 kmers +read 27000000 sequences, 5491635755 bases, 4681635755 kmers +read 28000000 sequences, 5657793355 bases, 4817793355 kmers +read 29000000 sequences, 5825695255 bases, 4955695255 kmers +read 30000000 sequences, 5993510862 bases, 5093510862 kmers +read 31000000 sequences, 6162441208 bases, 5232441208 kmers +read 32000000 sequences, 6331692362 bases, 5371692362 kmers +read 33000000 sequences, 6503271864 bases, 5513271864 kmers +read 34000000 sequences, 6675760229 bases, 5655760229 kmers +read 35000000 sequences, 6848330384 bases, 5798330384 kmers +read 36000000 sequences, 7023123965 bases, 5943123965 kmers +read 37000000 sequences, 7198253074 bases, 6088253074 kmers +read 38000000 sequences, 7375809245 bases, 6235809245 kmers +read 39000000 sequences, 7554546146 bases, 6384546146 kmers +read 40000000 sequences, 7733588270 bases, 6533588270 kmers +read 41000000 sequences, 7913812723 bases, 6683812723 kmers +read 42000000 sequences, 8096249793 bases, 6836249793 kmers +read 43000000 sequences, 8280221420 bases, 6990221420 kmers +read 44000000 sequences, 8465351199 bases, 7145351199 kmers +read 45000000 sequences, 8653130199 bases, 7303130199 kmers +read 46000000 sequences, 8842916979 bases, 7462916979 kmers +read 47000000 sequences, 9034171590 bases, 7624171590 kmers +read 48000000 sequences, 9229077420 bases, 7789077420 kmers +read 49000000 sequences, 9427173385 bases, 7957173385 kmers +read 50000000 sequences, 9626599822 bases, 8126599822 kmers +read 51000000 sequences, 9828281066 bases, 8298281066 kmers +read 52000000 sequences, 10034632099 bases, 8474632099 kmers +read 53000000 sequences, 10244441062 bases, 8654441062 kmers +read 54000000 sequences, 10461638729 bases, 8841638729 kmers +read 55000000 sequences, 10681775593 bases, 9031775593 kmers +read 56000000 sequences, 10909062511 bases, 9229062511 kmers +read 57000000 sequences, 11141765143 bases, 9431765143 kmers +read 58000000 sequences, 11382466536 bases, 9642466536 kmers +read 59000000 sequences, 11631643814 bases, 9861643814 kmers +read 60000000 sequences, 11893103257 bases, 10093103257 kmers +read 61000000 sequences, 12168237378 bases, 10338237378 kmers +read 62000000 sequences, 12459768025 bases, 10599768025 kmers +read 63000000 sequences, 12773380141 bases, 10883380141 kmers +read 64000000 sequences, 13118311195 bases, 11198311195 kmers +read 65000000 sequences, 13506575783 bases, 11556575783 kmers +read 66000000 sequences, 13957265370 bases, 11977265370 kmers +read 66677672 sequences, 14320170624 bases, 12319840464 kmers +num_kmers 12319840464 +cost: 2.0 + 0.324733 [bits/kmer] +max string length = 199388 +num bits per_absolute_offset = 34 +num bits per_relative_offset = 18 +num bits per_string_id = 26 +=== step 1 (encode strings): 38.0187 [sec] (3.08597 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.64.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.65.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.66.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.67.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.68.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.69.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.70.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.71.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.72.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.73.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.74.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.75.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.76.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.77.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.78.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.79.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.80.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.81.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.82.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.83.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.84.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.85.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.86.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.87.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.88.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.89.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.90.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.91.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.92.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.93.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.94.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.95.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.96.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.97.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.98.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.99.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.100.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.101.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.102.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.103.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.104.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.105.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.106.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.107.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.108.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.109.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.110.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.111.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.112.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.113.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.114.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.115.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.116.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.117.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.118.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.119.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.120.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.121.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.122.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.123.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.124.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.125.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.126.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.127.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.128.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.129.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.130.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.131.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.132.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.133.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.134.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.135.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.136.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.137.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.138.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.139.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.140.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.141.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.142.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.143.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.144.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.145.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.146.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.147.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.148.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.149.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.150.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.151.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.152.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.153.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.154.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.155.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.156.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.157.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.158.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.159.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.160.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.161.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.162.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.163.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.164.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.165.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.166.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.167.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.168.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.169.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.170.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.171.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.172.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.173.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.174.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.175.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.176.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.177.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.178.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.179.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.180.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.181.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.182.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.183.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.184.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.185.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.186.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.187.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.188.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.189.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.190.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.191.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.192.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.193.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.194.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.195.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.196.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.197.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.198.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.199.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.200.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.201.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.202.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.203.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.204.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.205.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.206.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.207.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.208.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.209.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.210.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.211.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.212.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.213.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.214.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.215.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.216.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.217.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.218.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.219.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.220.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.221.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.222.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.223.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.224.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.225.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.226.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.227.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.228.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.229.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.230.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.231.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.232.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.233.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.234.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.235.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.236.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.237.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.238.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.239.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.240.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.241.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.242.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.243.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.244.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.245.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.246.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.247.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.248.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.249.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.250.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.251.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.252.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.253.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.254.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.255.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.256.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.257.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.258.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.259.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.260.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.261.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.262.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.263.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.264.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.265.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.266.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.267.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.268.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.269.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.270.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.271.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.272.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.273.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.274.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.275.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.276.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.277.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.278.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.279.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.280.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.281.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.282.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.283.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.284.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.285.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.286.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.287.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.288.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.289.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.290.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.291.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.292.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.293.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.294.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.295.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.296.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.297.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.298.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.299.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.300.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.301.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.302.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.303.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.304.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.305.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.306.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.307.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.308.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.309.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.310.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.311.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.312.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.313.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.314.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.315.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.316.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.317.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.318.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.319.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.320.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.321.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.322.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.323.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.324.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.325.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.326.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.327.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.328.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.329.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.330.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.331.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.332.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.333.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.334.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.335.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.336.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.337.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.338.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.339.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.340.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.341.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.342.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.343.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.344.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.345.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.346.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.347.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.348.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.349.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.350.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.351.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.352.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.353.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.354.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.355.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.356.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.357.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.358.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.359.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.360.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.361.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.362.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.363.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.364.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.365.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.366.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.367.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.368.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.369.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.370.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.371.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.372.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.373.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.374.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.375.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.376.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.377.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.378.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.379.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.380.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.381.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.382.bin'... +=== step 2 (compute minimizer tuples): 40.8813 [sec] (3.31833 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +processed 700000000 minimizer tuples +processed 800000000 minimizer tuples +processed 900000000 minimizer tuples +processed 1000000000 minimizer tuples +processed 1100000000 minimizer tuples +processed 1200000000 minimizer tuples +processed 1300000000 minimizer tuples +processed 1400000000 minimizer tuples +processed 1500000000 minimizer tuples +processed 1600000000 minimizer tuples +processed 1700000000 minimizer tuples +processed 1800000000 minimizer tuples +processed 1900000000 minimizer tuples +processed 2000000000 minimizer tuples +processed 2100000000 minimizer tuples +processed 2200000000 minimizer tuples +processed 2300000000 minimizer tuples +processed 2400000000 minimizer tuples +processed 2500000000 minimizer tuples +processed 2600000000 minimizer tuples +=== step 3 (merging minimizer tuples): 208.951 [sec] (16.9606 [ns/kmer]) +num_minimizers = 2310123641 +num_minimizer_positions = 2534773036 +num_super_kmers = 2635842181 +building minimizers MPHF with 64 threads and 771 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 214.323 [sec] (17.3966 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.5.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 186.685 [sec] (15.1532 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762817658872962917.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +processed 700000000 minimizer tuples +processed 800000000 minimizer tuples +processed 900000000 minimizer tuples +processed 1000000000 minimizer tuples +processed 1100000000 minimizer tuples +processed 1200000000 minimizer tuples +processed 1300000000 minimizer tuples +processed 1400000000 minimizer tuples +processed 1500000000 minimizer tuples +processed 1600000000 minimizer tuples +processed 1700000000 minimizer tuples +processed 1800000000 minimizer tuples +processed 1900000000 minimizer tuples +processed 2000000000 minimizer tuples +processed 2100000000 minimizer tuples +processed 2200000000 minimizer tuples +processed 2300000000 minimizer tuples +processed 2400000000 minimizer tuples +processed 2500000000 minimizer tuples +processed 2600000000 minimizer tuples +=== step 6 (merging minimizers tuples): 273.627 [sec] (22.2103 [ns/kmer]) +num_bits_per_offset = 34 +num_buckets_larger_than_1_not_in_skew_index 139340525/2310123641 (6.03173%) +num_buckets_in_skew_index 26397/2310123641 (0.00114267%) +max_bucket_size 39709 +log2_max_bucket_size 16 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 359624077/2534773036 (14.1876%) +num_minimizer_positions_of_buckets_in_skew_index 4392240/2534773036 (0.173279%) +=== step 7.1 (build sparse index): 38.3962 [sec] (3.11661 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 8192991 + partition = 1: num kmers in buckets of size > 128 and <= 256: 4344352 + partition = 2: num kmers in buckets of size > 256 and <= 512: 3071572 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 1936665 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 1598600 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 1118432 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 506671 + partition = 7: num kmers in buckets of size > 8192 and <= 39709: 918303 +num kmers in skew index = 21687586 (0.176038%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 8192991 + building MPHF with 64 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[0] for 8192991 kmers; bits/key = 2.61394 + built positions[0] for 8192991 kmers; bits/key = 7.00004 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 4344352 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[1] for 4344352 kmers; bits/key = 2.78474 + built positions[1] for 4344352 kmers; bits/key = 8.00007 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 3071572 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[2] for 3071572 kmers; bits/key = 3.20805 + built positions[2] for 3071572 kmers; bits/key = 9.00011 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 1936665 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1936665)... + built mphs[3] for 1936665 kmers; bits/key = 2.56017 + built positions[3] for 1936665 kmers; bits/key = 10.0002 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 1598600 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1598600)... + built mphs[4] for 1598600 kmers; bits/key = 2.56041 + built positions[4] for 1598600 kmers; bits/key = 11.0002 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1118432 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1118432)... + built mphs[5] for 1118432 kmers; bits/key = 2.56094 + built positions[5] for 1118432 kmers; bits/key = 12.0003 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 506671 + building MPHF with 64 threads and 1 partitions (avg. partition size = 506671)... + built mphs[6] for 506671 kmers; bits/key = 2.42028 + built positions[6] for 506671 kmers; bits/key = 13.0007 + lower = 8192; upper = 39709; num_bits_per_pos = 16; num_kmers_in_partition = 918303 + building MPHF with 64 threads and 1 partitions (avg. partition size = 918303)... + built mphs[7] for 918303 kmers; bits/key = 2.56139 + built positions[7] for 918303 kmers; bits/key = 16.0004 +=== step 7.2 (build skew index): 5.83207 [sec] (0.473389 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 93.9671% +buckets with 2 minimizer positions = 4.43011% +buckets with 3 minimizer positions = 0.958706% +buckets with 4 minimizer positions = 0.312903% +buckets with 5 minimizer positions = 0.13175% +buckets with 6 minimizer positions = 0.0653104% +buckets with 7 minimizer positions = 0.0366997% +buckets with 8 minimizer positions = 0.0229438% +buckets with 9 minimizer positions = 0.01544% +buckets with 10 minimizer positions = 0.0109679% +buckets with 11 minimizer positions = 0.00812428% +buckets with 12 minimizer positions = 0.00620521% +buckets with 13 minimizer positions = 0.00487394% +buckets with 14 minimizer positions = 0.00390131% +buckets with 15 minimizer positions = 0.00317338% +buckets with 16 minimizer positions = 0.00260103% +max_bucket_size = 39709 +=== step 7 (build sparse and skew index): 49.8393 [sec] (4.04545 [ns/kmer]) +=== total time: 1012.33 [sec] (82.1704 [ns/kmer]) +total index size: 16357630621 [B] -- 16357.6 [MB] +SPACE BREAKDOWN: + mphf: 0.5305 [bits/kmer] (2.82915 [bits/key]) -- 4.99436% + strings_offsets: 0.178888 [bits/kmer] -- 1.68413% + control_codewords: 6.56294 [bits/kmer] -- 61.7864% + mid_load_buckets: 0.992482 [bits/kmer] -- 9.34367% + begin_buckets_of_size: 1.74028e-07 [bits/kmer] -- 1.63838e-06% + strings: 2.32473 [bits/kmer] -- 21.8861% + skew_index: 0.0324357 [bits/kmer] -- 0.305364% + weights: 1.19482e-07 [bits/kmer] -- 1.12486e-06% + -------------- + total: 10.622 [bits/kmer] +2025-11-11 00:51:11: saving data structure to disk... +2025-11-11 00:52:28: DONE diff --git a/benchmarks/results-10-11-25/k31/canon-build.time.log b/benchmarks/results-10-11-25/k31/canon-build.time.log new file mode 100644 index 0000000..c05ec83 --- /dev/null +++ b/benchmarks/results-10-11-25/k31/canon-build.time.log @@ -0,0 +1,207 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k31.eulertigs.fa.gz -k 31 -m 20 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.canon.sshash --canonical" + User time (seconds): 55.86 + System time (seconds): 13.46 + Percent of CPU this job got: 230% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:30.03 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 4893484 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 3272542 + Voluntary context switches: 30178 + Involuntary context switches: 8313 + Swaps: 0 + File system inputs: 121616 + File system outputs: 14514904 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k31.eulertigs.fa.gz -k 31 -m 20 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.canon.sshash --canonical" + User time (seconds): 138.73 + System time (seconds): 28.72 + Percent of CPU this job got: 229% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:12.84 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 9627436 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 7323840 + Voluntary context switches: 38810 + Involuntary context switches: 8515 + Swaps: 0 + File system inputs: 667928 + File system outputs: 33377312 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.canon.sshash --canonical" + User time (seconds): 402.01 + System time (seconds): 77.72 + Percent of CPU this job got: 234% + Elapsed (wall clock) time (h:mm:ss or m:ss): 3:24.55 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 20293184 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 16812014 + Voluntary context switches: 105629 + Involuntary context switches: 14100 + Swaps: 0 + File system inputs: 1725608 + File system outputs: 97182920 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/axolotl.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k31.canon.sshash --canonical" + User time (seconds): 3096.75 + System time (seconds): 465.92 + Percent of CPU this job got: 204% + Elapsed (wall clock) time (h:mm:ss or m:ss): 29:02.78 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 117399064 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 83853618 + Voluntary context switches: 949118 + Involuntary context switches: 85961 + Swaps: 0 + File system inputs: 16123848 + File system outputs: 707620200 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.canon.sshash --canonical" + User time (seconds): 594.65 + System time (seconds): 102.64 + Percent of CPU this job got: 224% + Elapsed (wall clock) time (h:mm:ss or m:ss): 5:11.03 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 26835704 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 23863157 + Voluntary context switches: 140738 + Involuntary context switches: 17267 + Swaps: 0 + File system inputs: 328 + File system outputs: 147931672 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ec.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/ec.k31.canon.sshash --canonical" + User time (seconds): 153.25 + System time (seconds): 33.92 + Percent of CPU this job got: 211% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:28.40 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 11295764 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 8234867 + Voluntary context switches: 31507 + Involuntary context switches: 9385 + Swaps: 0 + File system inputs: 96 + File system outputs: 35597416 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.canon.sshash --canonical" + User time (seconds): 118.00 + System time (seconds): 26.39 + Percent of CPU this job got: 197% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:12.97 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 9106788 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6678995 + Voluntary context switches: 33139 + Involuntary context switches: 8840 + Swaps: 0 + File system inputs: 80 + File system outputs: 28721576 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k31.eulertigs.fa.gz -k 31 -m 19 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.canon.sshash --canonical" + User time (seconds): 34.94 + System time (seconds): 9.59 + Percent of CPU this job got: 237% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:18.78 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 3577468 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 2329236 + Voluntary context switches: 29575 + Involuntary context switches: 7356 + Swaps: 0 + File system inputs: 56 + File system outputs: 10334360 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/jgi_fungi.batch-0.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k31.canon.sshash --canonical" + User time (seconds): 1985.87 + System time (seconds): 329.22 + Percent of CPU this job got: 212% + Elapsed (wall clock) time (h:mm:ss or m:ss): 18:10.53 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 72774192 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 58475937 + Voluntary context switches: 742078 + Involuntary context switches: 54485 + Swaps: 0 + File system inputs: 744 + File system outputs: 484368272 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-10-11-25/k31/canon-streaming-queries-high-hit.json b/benchmarks/results-10-11-25/k31/canon-streaming-queries-high-hit.json new file mode 100644 index 0000000..5ba1b3d --- /dev/null +++ b/benchmarks/results-10-11-25/k31/canon-streaming-queries-high-hit.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz", "num_kmers": "163287360", "num_positive_kmers": "132860997", "num_negative_kmers": "30426363", "num_invalid_kmers": "0", "num_searches": "6576340", "num_extensions": "126284657", "elapsed_millisec": "4862"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz", "num_kmers": "695737535", "num_positive_kmers": "525542891", "num_negative_kmers": "170183654", "num_invalid_kmers": "10990", "num_searches": "12437476", "num_extensions": "513105415", "elapsed_millisec": "34000"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz", "num_kmers": "1569974986", "num_positive_kmers": "1437949378", "num_negative_kmers": "130996597", "num_invalid_kmers": "1029011", "num_searches": "100222623", "num_extensions": "1337726755", "elapsed_millisec": "134221"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/ncbi-queries.fastq.gz", "num_kmers": "14092875", "num_positive_kmers": "13983775", "num_negative_kmers": "108161", "num_invalid_kmers": "939", "num_searches": "590894", "num_extensions": "13392881", "elapsed_millisec": "405"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR27871075_1.fastq.gz", "num_kmers": "789838196", "num_positive_kmers": "764882549", "num_negative_kmers": "24935381", "num_invalid_kmers": "20266", "num_searches": "218875709", "num_extensions": "546006840", "elapsed_millisec": "182160"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz", "num_kmers": "1569974986", "num_positive_kmers": "1485223278", "num_negative_kmers": "83722697", "num_invalid_kmers": "1029011", "num_searches": "135823240", "num_extensions": "1349400038", "elapsed_millisec": "170163"} diff --git a/benchmarks/results-10-11-25/k31/canon-streaming-queries-high-hit.log b/benchmarks/results-10-11-25/k31/canon-streaming-queries-high-hit.log new file mode 100644 index 0000000..a6cde44 --- /dev/null +++ b/benchmarks/results-10-11-25/k31/canon-streaming-queries-high-hit.log @@ -0,0 +1,66 @@ +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz +2025-11-11 21:00:50: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz'... +2025-11-11 21:00:54: DONE +==== query report: +num_kmers = 163287360 +num_positive_kmers = 132860997 (81.3664%) +num_negative_kmers = 30426363 (18.6336%) +num_invalid_kmers = 0 (0%) +num_searches = 6576340/132860997 (4.94979%) +num_extensions = 126284657/132860997 (95.0502%) +elapsed = 4.862 sec / 0.0810333 min / 29.7757 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz +2025-11-11 21:00:55: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz'... +2025-11-11 21:01:29: DONE +==== query report: +num_kmers = 695737535 +num_positive_kmers = 525542891 (75.5375%) +num_negative_kmers = 170183654 (24.4609%) +num_invalid_kmers = 10990 (0.00157962%) +num_searches = 12437476/525542891 (2.3666%) +num_extensions = 513105415/525542891 (97.6334%) +elapsed = 34 sec / 0.566667 min / 48.869 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz +2025-11-11 21:01:30: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2025-11-11 21:03:45: DONE +==== query report: +num_kmers = 1569974986 +num_positive_kmers = 1437949378 (91.5906%) +num_negative_kmers = 130996597 (8.34387%) +num_invalid_kmers = 1029011 (0.0655431%) +num_searches = 100222623/1437949378 (6.96983%) +num_extensions = 1337726755/1437949378 (93.0302%) +elapsed = 134.221 sec / 2.23702 min / 85.4924 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/ncbi-queries.fastq.gz +2025-11-11 21:03:45: performing queries from file '/mnt/hd2/pibiri/DNA/queries/ncbi-queries.fastq.gz'... +2025-11-11 21:03:45: DONE +==== query report: +num_kmers = 14092875 +num_positive_kmers = 13983775 (99.2258%) +num_negative_kmers = 108161 (0.767487%) +num_invalid_kmers = 939 (0.00666294%) +num_searches = 590894/13983775 (4.22557%) +num_extensions = 13392881/13983775 (95.7744%) +elapsed = 0.405 sec / 0.00675 min / 28.7379 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR27871075_1.fastq.gz +2025-11-11 21:03:46: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR27871075_1.fastq.gz'... +2025-11-11 21:06:48: DONE +==== query report: +num_kmers = 789838196 +num_positive_kmers = 764882549 (96.8404%) +num_negative_kmers = 24935381 (3.15702%) +num_invalid_kmers = 20266 (0.00256584%) +num_searches = 218875709/764882549 (28.6156%) +num_extensions = 546006840/764882549 (71.3844%) +elapsed = 182.16 sec / 3.036 min / 230.63 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz +2025-11-11 21:06:50: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2025-11-11 21:09:40: DONE +==== query report: +num_kmers = 1569974986 +num_positive_kmers = 1485223278 (94.6017%) +num_negative_kmers = 83722697 (5.33274%) +num_invalid_kmers = 1029011 (0.0655431%) +num_searches = 135823240/1485223278 (9.14497%) +num_extensions = 1349400038/1485223278 (90.855%) +elapsed = 170.163 sec / 2.83605 min / 108.386 ns/kmer diff --git a/benchmarks/results-10-11-25/k31/regular-bench.json b/benchmarks/results-10-11-25/k31/regular-bench.json new file mode 100644 index 0000000..3377f8b --- /dev/null +++ b/benchmarks/results-10-11-25/k31/regular-bench.json @@ -0,0 +1,27 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.sshash", "k": "31", "m": "20", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "593.468372", "negative lookup (avg_nanosec_per_kmer)": "669.693262", "access (avg_nanosec_per_kmer)": "276.835679", "iterator (avg_nanosec_per_kmer)": "2.538931"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.sshash", "k": "31", "m": "20", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "595.468905", "negative lookup (avg_nanosec_per_kmer)": "674.647963", "access (avg_nanosec_per_kmer)": "277.032749", "iterator (avg_nanosec_per_kmer)": "2.567583"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.sshash", "k": "31", "m": "20", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "595.824805", "negative lookup (avg_nanosec_per_kmer)": "675.654607", "access (avg_nanosec_per_kmer)": "275.144576", "iterator (avg_nanosec_per_kmer)": "2.547010"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.sshash", "k": "31", "m": "20", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "605.924314", "negative lookup (avg_nanosec_per_kmer)": "743.327657", "access (avg_nanosec_per_kmer)": "278.247136", "iterator (avg_nanosec_per_kmer)": "2.608827"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.sshash", "k": "31", "m": "20", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "607.134942", "negative lookup (avg_nanosec_per_kmer)": "735.006204", "access (avg_nanosec_per_kmer)": "277.400333", "iterator (avg_nanosec_per_kmer)": "2.515158"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.sshash", "k": "31", "m": "20", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "606.580916", "negative lookup (avg_nanosec_per_kmer)": "739.803977", "access (avg_nanosec_per_kmer)": "279.166931", "iterator (avg_nanosec_per_kmer)": "2.571017"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "813.941644", "negative lookup (avg_nanosec_per_kmer)": "801.526599", "access (avg_nanosec_per_kmer)": "351.348360", "iterator (avg_nanosec_per_kmer)": "2.545131"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "827.115043", "negative lookup (avg_nanosec_per_kmer)": "798.392897", "access (avg_nanosec_per_kmer)": "379.988941", "iterator (avg_nanosec_per_kmer)": "2.509446"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "815.452368", "negative lookup (avg_nanosec_per_kmer)": "796.054816", "access (avg_nanosec_per_kmer)": "349.788425", "iterator (avg_nanosec_per_kmer)": "2.525423"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k31.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1279.524666", "negative lookup (avg_nanosec_per_kmer)": "1140.356605", "access (avg_nanosec_per_kmer)": "911.249444", "iterator (avg_nanosec_per_kmer)": "2.617473"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k31.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1280.785104", "negative lookup (avg_nanosec_per_kmer)": "1130.026929", "access (avg_nanosec_per_kmer)": "882.075007", "iterator (avg_nanosec_per_kmer)": "2.520366"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k31.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1281.807215", "negative lookup (avg_nanosec_per_kmer)": "1135.152370", "access (avg_nanosec_per_kmer)": "884.420808", "iterator (avg_nanosec_per_kmer)": "2.547944"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1054.962798", "negative lookup (avg_nanosec_per_kmer)": "849.982934", "access (avg_nanosec_per_kmer)": "546.170974", "iterator (avg_nanosec_per_kmer)": "2.529945"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1053.023746", "negative lookup (avg_nanosec_per_kmer)": "854.227562", "access (avg_nanosec_per_kmer)": "538.476049", "iterator (avg_nanosec_per_kmer)": "2.497507"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1059.839346", "negative lookup (avg_nanosec_per_kmer)": "865.469048", "access (avg_nanosec_per_kmer)": "546.279808", "iterator (avg_nanosec_per_kmer)": "2.541977"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ec.k31.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "981.367394", "negative lookup (avg_nanosec_per_kmer)": "763.714532", "access (avg_nanosec_per_kmer)": "389.740954", "iterator (avg_nanosec_per_kmer)": "2.483201"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ec.k31.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "991.675857", "negative lookup (avg_nanosec_per_kmer)": "770.689714", "access (avg_nanosec_per_kmer)": "388.704925", "iterator (avg_nanosec_per_kmer)": "2.546658"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ec.k31.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "980.133409", "negative lookup (avg_nanosec_per_kmer)": "764.223274", "access (avg_nanosec_per_kmer)": "389.881518", "iterator (avg_nanosec_per_kmer)": "2.501424"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "901.917992", "negative lookup (avg_nanosec_per_kmer)": "756.057889", "access (avg_nanosec_per_kmer)": "362.369540", "iterator (avg_nanosec_per_kmer)": "2.521895"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "898.436881", "negative lookup (avg_nanosec_per_kmer)": "753.081349", "access (avg_nanosec_per_kmer)": "364.493503", "iterator (avg_nanosec_per_kmer)": "2.486766"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "899.569776", "negative lookup (avg_nanosec_per_kmer)": "756.405967", "access (avg_nanosec_per_kmer)": "360.164163", "iterator (avg_nanosec_per_kmer)": "2.613320"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.sshash", "k": "31", "m": "19", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "541.245229", "negative lookup (avg_nanosec_per_kmer)": "653.133132", "access (avg_nanosec_per_kmer)": "262.337790", "iterator (avg_nanosec_per_kmer)": "2.551707"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.sshash", "k": "31", "m": "19", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "541.255620", "negative lookup (avg_nanosec_per_kmer)": "645.457730", "access (avg_nanosec_per_kmer)": "266.065450", "iterator (avg_nanosec_per_kmer)": "2.564080"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.sshash", "k": "31", "m": "19", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "540.294952", "negative lookup (avg_nanosec_per_kmer)": "649.277321", "access (avg_nanosec_per_kmer)": "262.223913", "iterator (avg_nanosec_per_kmer)": "2.537130"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k31.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "988.726663", "negative lookup (avg_nanosec_per_kmer)": "1040.039111", "access (avg_nanosec_per_kmer)": "684.857732", "iterator (avg_nanosec_per_kmer)": "2.550971"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k31.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "993.003917", "negative lookup (avg_nanosec_per_kmer)": "1049.810215", "access (avg_nanosec_per_kmer)": "679.543637", "iterator (avg_nanosec_per_kmer)": "2.545148"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k31.sshash", "k": "31", "m": "21", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "993.312118", "negative lookup (avg_nanosec_per_kmer)": "1038.729618", "access (avg_nanosec_per_kmer)": "680.134008", "iterator (avg_nanosec_per_kmer)": "2.543616"} diff --git a/benchmarks/results-10-11-25/k31/regular-bench.log b/benchmarks/results-10-11-25/k31/regular-bench.log new file mode 100644 index 0000000..4e6146c --- /dev/null +++ b/benchmarks/results-10-11-25/k31/regular-bench.log @@ -0,0 +1,135 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 593.468 +negative lookup (avg_nanosec_per_kmer) 669.693 +access (avg_nanosec_per_kmer) = 276.836 +iterator (avg_nanosec_per_kmer) = 2.53893 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 595.469 +negative lookup (avg_nanosec_per_kmer) 674.648 +access (avg_nanosec_per_kmer) = 277.033 +iterator (avg_nanosec_per_kmer) = 2.56758 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 595.825 +negative lookup (avg_nanosec_per_kmer) 675.655 +access (avg_nanosec_per_kmer) = 275.145 +iterator (avg_nanosec_per_kmer) = 2.54701 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 605.924 +negative lookup (avg_nanosec_per_kmer) 743.328 +access (avg_nanosec_per_kmer) = 278.247 +iterator (avg_nanosec_per_kmer) = 2.60883 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 607.135 +negative lookup (avg_nanosec_per_kmer) 735.006 +access (avg_nanosec_per_kmer) = 277.4 +iterator (avg_nanosec_per_kmer) = 2.51516 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 606.581 +negative lookup (avg_nanosec_per_kmer) 739.804 +access (avg_nanosec_per_kmer) = 279.167 +iterator (avg_nanosec_per_kmer) = 2.57102 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 813.942 +negative lookup (avg_nanosec_per_kmer) 801.527 +access (avg_nanosec_per_kmer) = 351.348 +iterator (avg_nanosec_per_kmer) = 2.54513 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 827.115 +negative lookup (avg_nanosec_per_kmer) 798.393 +access (avg_nanosec_per_kmer) = 379.989 +iterator (avg_nanosec_per_kmer) = 2.50945 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 815.452 +negative lookup (avg_nanosec_per_kmer) 796.055 +access (avg_nanosec_per_kmer) = 349.788 +iterator (avg_nanosec_per_kmer) = 2.52542 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 1279.52 +negative lookup (avg_nanosec_per_kmer) 1140.36 +access (avg_nanosec_per_kmer) = 911.249 +iterator (avg_nanosec_per_kmer) = 2.61747 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 1280.79 +negative lookup (avg_nanosec_per_kmer) 1130.03 +access (avg_nanosec_per_kmer) = 882.075 +iterator (avg_nanosec_per_kmer) = 2.52037 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 1281.81 +negative lookup (avg_nanosec_per_kmer) 1135.15 +access (avg_nanosec_per_kmer) = 884.421 +iterator (avg_nanosec_per_kmer) = 2.54794 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 1054.96 +negative lookup (avg_nanosec_per_kmer) 849.983 +access (avg_nanosec_per_kmer) = 546.171 +iterator (avg_nanosec_per_kmer) = 2.52994 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 1053.02 +negative lookup (avg_nanosec_per_kmer) 854.228 +access (avg_nanosec_per_kmer) = 538.476 +iterator (avg_nanosec_per_kmer) = 2.49751 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 1059.84 +negative lookup (avg_nanosec_per_kmer) 865.469 +access (avg_nanosec_per_kmer) = 546.28 +iterator (avg_nanosec_per_kmer) = 2.54198 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ec.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 981.367 +negative lookup (avg_nanosec_per_kmer) 763.715 +access (avg_nanosec_per_kmer) = 389.741 +iterator (avg_nanosec_per_kmer) = 2.4832 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ec.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 991.676 +negative lookup (avg_nanosec_per_kmer) 770.69 +access (avg_nanosec_per_kmer) = 388.705 +iterator (avg_nanosec_per_kmer) = 2.54666 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ec.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 980.133 +negative lookup (avg_nanosec_per_kmer) 764.223 +access (avg_nanosec_per_kmer) = 389.882 +iterator (avg_nanosec_per_kmer) = 2.50142 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 901.918 +negative lookup (avg_nanosec_per_kmer) 756.058 +access (avg_nanosec_per_kmer) = 362.37 +iterator (avg_nanosec_per_kmer) = 2.5219 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 898.437 +negative lookup (avg_nanosec_per_kmer) 753.081 +access (avg_nanosec_per_kmer) = 364.494 +iterator (avg_nanosec_per_kmer) = 2.48677 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 899.57 +negative lookup (avg_nanosec_per_kmer) 756.406 +access (avg_nanosec_per_kmer) = 360.164 +iterator (avg_nanosec_per_kmer) = 2.61332 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 541.245 +negative lookup (avg_nanosec_per_kmer) 653.133 +access (avg_nanosec_per_kmer) = 262.338 +iterator (avg_nanosec_per_kmer) = 2.55171 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 541.256 +negative lookup (avg_nanosec_per_kmer) 645.458 +access (avg_nanosec_per_kmer) = 266.065 +iterator (avg_nanosec_per_kmer) = 2.56408 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 540.295 +negative lookup (avg_nanosec_per_kmer) 649.277 +access (avg_nanosec_per_kmer) = 262.224 +iterator (avg_nanosec_per_kmer) = 2.53713 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 988.727 +negative lookup (avg_nanosec_per_kmer) 1040.04 +access (avg_nanosec_per_kmer) = 684.858 +iterator (avg_nanosec_per_kmer) = 2.55097 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 993.004 +negative lookup (avg_nanosec_per_kmer) 1049.81 +access (avg_nanosec_per_kmer) = 679.544 +iterator (avg_nanosec_per_kmer) = 2.54515 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k31.sshash +positive lookup (avg_nanosec_per_kmer) = 993.312 +negative lookup (avg_nanosec_per_kmer) 1038.73 +access (avg_nanosec_per_kmer) = 680.134 +iterator (avg_nanosec_per_kmer) = 2.54362 diff --git a/benchmarks/results-10-11-25/k31/regular-build.json b/benchmarks/results-10-11-25/k31/regular-build.json new file mode 100644 index 0000000..d017d36 --- /dev/null +++ b/benchmarks/results-10-11-25/k31/regular-build.json @@ -0,0 +1,9 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/cod.k31.eulertigs.fa.gz", "k": "31", "m": "20", "canonical": "false", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "1454001", "step 2 (compute minimizer tuples)": "509596", "step 3 (merging minimizer tuples)": "3936428", "step 4 (build mphf)": "2993043", "step 5 (replacing minimizer values with MPHF hashes)": "2732530", "step 6 (merging minimizers tuples)": "10827581", "step 7.1 (build sparse index)": "920075", "step 7.2 (build skew index)": "2573416", "step 7 (build sparse and skew index)": "3661230", "total_build_time_in_microsec": "26114409", "index_size_in_bytes": "495332377", "num_kmers": "502465200"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/kestrel.k31.eulertigs.fa.gz", "k": "31", "m": "20", "canonical": "false", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "2671135", "step 2 (compute minimizer tuples)": "979126", "step 3 (merging minimizer tuples)": "8609168", "step 4 (build mphf)": "6674200", "step 5 (replacing minimizer values with MPHF hashes)": "6142475", "step 6 (merging minimizers tuples)": "15774504", "step 7.1 (build sparse index)": "1474226", "step 7.2 (build skew index)": "662961", "step 7 (build sparse and skew index)": "2486478", "total_build_time_in_microsec": "43337086", "index_size_in_bytes": "1077792939", "num_kmers": "1150399205"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "false", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "7204653", "step 2 (compute minimizer tuples)": "2309928", "step 3 (merging minimizer tuples)": "21796889", "step 4 (build mphf)": "15201595", "step 5 (replacing minimizer values with MPHF hashes)": "14821673", "step 6 (merging minimizers tuples)": "47620820", "step 7.1 (build sparse index)": "4960098", "step 7.2 (build skew index)": "7003843", "step 7 (build sparse and skew index)": "12842196", "total_build_time_in_microsec": "121797754", "index_size_in_bytes": "2716053224", "num_kmers": "2505678680"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/axolotl.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "false", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "74035635", "step 2 (compute minimizer tuples)": "85579820", "step 3 (merging minimizer tuples)": "1099537910", "step 4 (build mphf)": "206294431", "step 5 (replacing minimizer values with MPHF hashes)": "109116591", "step 6 (merging minimizers tuples)": "415629995", "step 7.1 (build sparse index)": "54969610", "step 7.2 (build skew index)": "33949847", "step 7 (build sparse and skew index)": "96180453", "total_build_time_in_microsec": "2086374835", "index_size_in_bytes": "22620878165", "num_kmers": "17987935180"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/hprc.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "false", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "17585651", "step 2 (compute minimizer tuples)": "6549516", "step 3 (merging minimizer tuples)": "57673000", "step 4 (build mphf)": "21101231", "step 5 (replacing minimizer values with MPHF hashes)": "22471693", "step 6 (merging minimizers tuples)": "95446722", "step 7.1 (build sparse index)": "11220269", "step 7.2 (build skew index)": "15014989", "step 7 (build sparse and skew index)": "27857986", "total_build_time_in_microsec": "248685799", "index_size_in_bytes": "4810783166", "num_kmers": "3718120949"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/ec.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "false", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "6086742", "step 2 (compute minimizer tuples)": "1230828", "step 3 (merging minimizer tuples)": "10647280", "step 4 (build mphf)": "6366655", "step 5 (replacing minimizer values with MPHF hashes)": "6977114", "step 6 (merging minimizers tuples)": "29607621", "step 7.1 (build sparse index)": "3929955", "step 7.2 (build skew index)": "6081166", "step 7 (build sparse and skew index)": "10543780", "total_build_time_in_microsec": "71460020", "index_size_in_bytes": "1416509371", "num_kmers": "1111018845"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "false", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "5393718", "step 2 (compute minimizer tuples)": "1021948", "step 3 (merging minimizer tuples)": "8541526", "step 4 (build mphf)": "5257783", "step 5 (replacing minimizer values with MPHF hashes)": "5627145", "step 6 (merging minimizers tuples)": "12275385", "step 7.1 (build sparse index)": "3117678", "step 7.2 (build skew index)": "3545548", "step 7 (build sparse and skew index)": "7088806", "total_build_time_in_microsec": "45206311", "index_size_in_bytes": "1137030140", "num_kmers": "894310084"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k31.eulertigs.fa.gz", "k": "31", "m": "19", "canonical": "false", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "1176293", "step 2 (compute minimizer tuples)": "487854", "step 3 (merging minimizer tuples)": "2746448", "step 4 (build mphf)": "2352642", "step 5 (replacing minimizer values with MPHF hashes)": "2009973", "step 6 (merging minimizers tuples)": "3772587", "step 7.1 (build sparse index)": "646190", "step 7.2 (build skew index)": "27902", "step 7 (build sparse and skew index)": "806121", "total_build_time_in_microsec": "13351918", "index_size_in_bytes": "346391727", "num_kmers": "376205185"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/jgi_fungi.batch-0.k31.eulertigs.fa.gz", "k": "31", "m": "21", "canonical": "false", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "40467955", "step 2 (compute minimizer tuples)": "13547658", "step 3 (merging minimizer tuples)": "157576640", "step 4 (build mphf)": "172891710", "step 5 (replacing minimizer values with MPHF hashes)": "150310190", "step 6 (merging minimizers tuples)": "239906881", "step 7.1 (build sparse index)": "26834532", "step 7.2 (build skew index)": "4793222", "step 7 (build sparse and skew index)": "36123229", "total_build_time_in_microsec": "810824263", "index_size_in_bytes": "14122834359", "num_kmers": "12319840464"} diff --git a/benchmarks/results-10-11-25/k31/regular-build.log b/benchmarks/results-10-11-25/k31/regular-build.log new file mode 100644 index 0000000..9cae9e4 --- /dev/null +++ b/benchmarks/results-10-11-25/k31/regular-build.log @@ -0,0 +1,2710 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k31.eulertigs.fa.gz -k 31 -m 20 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.sshash +2025-11-10 22:50:18: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/cod.k31.eulertigs.fa.gz'... +read 1000000 sequences, 260758668 bases, 230758668 kmers +read 2000000 sequences, 549832064 bases, 489832064 kmers +read 2057242 sequences, 564182460 bases, 502465200 kmers +num_kmers 502465200 +cost: 2.0 + 0.245658 [bits/kmer] +max string length = 31415 +num bits per_absolute_offset = 30 +num bits per_relative_offset = 15 +num bits per_string_id = 21 +=== step 1 (encode strings): 1.454 [sec] (2.89373 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.509596 [sec] (1.01419 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.bin' +=== step 3 (merging minimizer tuples): 3.93643 [sec] (7.83423 [ns/kmer]) +num_minimizers = 72381146 +num_minimizer_positions = 78455681 +num_super_kmers = 78455681 +building minimizers MPHF with 64 threads and 25 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 2.99304 [sec] (5.95672 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811418032067371.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 2.73253 [sec] (5.43825 [ns/kmer]) +=== step 6 (merging minimizers tuples): 10.8276 [sec] (21.5489 [ns/kmer]) +num_bits_per_offset = 30 +num_buckets_larger_than_1_not_in_skew_index 2259768/72381146 (3.12204%) +num_buckets_in_skew_index 4215/72381146 (0.00582334%) +max_bucket_size 70346 +log2_max_bucket_size 17 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 7357522/78455681 (9.37793%) +num_minimizer_positions_of_buckets_in_skew_index 980996/78455681 (1.25038%) +=== step 7.1 (build sparse index): 0.920075 [sec] (1.83112 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 1260546 + partition = 1: num kmers in buckets of size > 128 and <= 256: 994175 + partition = 2: num kmers in buckets of size > 256 and <= 512: 665719 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 408608 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 423234 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 271419 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 288814 + partition = 7: num kmers in buckets of size > 8192 and <= 70346: 428378 +num kmers in skew index = 4740893 (0.943527%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 1260546 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1260546)... + built mphs[0] for 1260546 kmers; bits/key = 2.56076 + built positions[0] for 1260546 kmers; bits/key = 7.00029 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 994175 + building MPHF with 64 threads and 1 partitions (avg. partition size = 994175)... + built mphs[1] for 994175 kmers; bits/key = 2.41831 + built positions[1] for 994175 kmers; bits/key = 8.00033 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 665719 + building MPHF with 64 threads and 1 partitions (avg. partition size = 665719)... + built mphs[2] for 665719 kmers; bits/key = 2.41938 + built positions[2] for 665719 kmers; bits/key = 9.00051 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 408608 + building MPHF with 64 threads and 1 partitions (avg. partition size = 408608)... + built mphs[3] for 408608 kmers; bits/key = 2.42149 + built positions[3] for 408608 kmers; bits/key = 10.0008 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 423234 + building MPHF with 64 threads and 1 partitions (avg. partition size = 423234)... + built mphs[4] for 423234 kmers; bits/key = 2.56407 + built positions[4] for 423234 kmers; bits/key = 11.0009 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 271419 + building MPHF with 64 threads and 1 partitions (avg. partition size = 271419)... + built mphs[5] for 271419 kmers; bits/key = 2.42412 + built positions[5] for 271419 kmers; bits/key = 12.0014 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 288814 + building MPHF with 64 threads and 1 partitions (avg. partition size = 288814)... + built mphs[6] for 288814 kmers; bits/key = 2.42387 + built positions[6] for 288814 kmers; bits/key = 13.0013 + lower = 8192; upper = 70346; num_bits_per_pos = 17; num_kmers_in_partition = 428378 + building MPHF with 64 threads and 1 partitions (avg. partition size = 428378)... + built mphs[7] for 428378 kmers; bits/key = 2.56398 + built positions[7] for 428378 kmers; bits/key = 17.0008 +=== step 7.2 (build skew index): 2.57342 [sec] (5.12158 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 96.8721% +buckets with 2 minimizer positions = 2.00417% +buckets with 3 minimizer positions = 0.51765% +buckets with 4 minimizer positions = 0.211311% +buckets with 5 minimizer positions = 0.109826% +buckets with 6 minimizer positions = 0.0661802% +buckets with 7 minimizer positions = 0.0437061% +buckets with 8 minimizer positions = 0.0308464% +buckets with 9 minimizer positions = 0.0235614% +buckets with 10 minimizer positions = 0.0177228% +buckets with 11 minimizer positions = 0.0139967% +buckets with 12 minimizer positions = 0.0112902% +buckets with 13 minimizer positions = 0.00931044% +buckets with 14 minimizer positions = 0.00771748% +buckets with 15 minimizer positions = 0.00632347% +buckets with 16 minimizer positions = 0.00528591% +max_bucket_size = 70346 +=== step 7 (build sparse and skew index): 3.66123 [sec] (7.28653 [ns/kmer]) +=== total time: 26.1144 [sec] (51.9726 [ns/kmer]) +total index size: 495332377 [B] -- 495.332 [MB] +SPACE BREAKDOWN: + mphf: 0.41828 [bits/kmer] (2.90367 [bits/key]) -- 5.30379% + strings_offsets: 0.144419 [bits/kmer] -- 1.83123% + control_codewords: 4.46561 [bits/kmer] -- 56.624% + mid_load_buckets: 0.439286 [bits/kmer] -- 5.57015% + begin_buckets_of_size: 4.26696e-06 [bits/kmer] -- 5.41051e-05% + strings: 2.24566 [bits/kmer] -- 28.4749% + skew_index: 0.173169 [bits/kmer] -- 2.19578% + weights: 2.92956e-06 [bits/kmer] -- 3.71468e-05% + -------------- + total: 7.88643 [bits/kmer] +2025-11-10 22:50:44: saving data structure to disk... +2025-11-10 22:50:46: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k31.eulertigs.fa.gz -k 31 -m 20 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.sshash +2025-11-10 22:50:46: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/kestrel.k31.eulertigs.fa.gz'... +read 582860 sequences, 1167885005 bases, 1150399205 kmers +num_kmers 1150399205 +cost: 2.0 + 0.0303995 [bits/kmer] +max string length = 111973 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 20 +=== step 1 (encode strings): 2.67114 [sec] (2.32192 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.979126 [sec] (0.851118 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 8.60917 [sec] (7.48364 [ns/kmer]) +num_minimizers = 173258591 +num_minimizer_positions = 175959772 +num_super_kmers = 175959772 +building minimizers MPHF with 64 threads and 58 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 6.6742 [sec] (5.80164 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811446407881372.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 6.14248 [sec] (5.33943 [ns/kmer]) +=== step 6 (merging minimizers tuples): 15.7745 [sec] (13.7122 [ns/kmer]) +num_bits_per_offset = 31 +num_buckets_larger_than_1_not_in_skew_index 1311512/173258591 (0.756968%) +num_buckets_in_skew_index 1541/173258591 (0.000889422%) +max_bucket_size 3747 +log2_max_bucket_size 12 +num_partitions in skew index 6 +num_minimizer_positions_of_buckets_larger_than_1 3806575/175959772 (2.16332%) +num_minimizer_positions_of_buckets_in_skew_index 207659/175959772 (0.118015%) +=== step 7.1 (build sparse index): 1.47423 [sec] (1.28149 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 559507 + partition = 1: num kmers in buckets of size > 128 and <= 256: 317132 + partition = 2: num kmers in buckets of size > 256 and <= 512: 144139 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 106543 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 21308 + partition = 5: num kmers in buckets of size > 2048 and <= 3747: 36738 +num kmers in skew index = 1185367 (0.10304%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 559507 + building MPHF with 64 threads and 1 partitions (avg. partition size = 559507)... + built mphs[0] for 559507 kmers; bits/key = 2.41999 + built positions[0] for 559507 kmers; bits/key = 7.00068 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 317132 + building MPHF with 64 threads and 1 partitions (avg. partition size = 317132)... + built mphs[1] for 317132 kmers; bits/key = 2.42276 + built positions[1] for 317132 kmers; bits/key = 8.00111 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 144139 + building MPHF with 64 threads and 1 partitions (avg. partition size = 144139)... + built mphs[2] for 144139 kmers; bits/key = 2.43099 + built positions[2] for 144139 kmers; bits/key = 9.00242 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 106543 + building MPHF with 64 threads and 1 partitions (avg. partition size = 106543)... + built mphs[3] for 106543 kmers; bits/key = 2.43597 + built positions[3] for 106543 kmers; bits/key = 10.0034 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 21308 + building MPHF with 64 threads and 1 partitions (avg. partition size = 21308)... + built mphs[4] for 21308 kmers; bits/key = 2.51323 + built positions[4] for 21308 kmers; bits/key = 11.0171 + lower = 2048; upper = 3747; num_bits_per_pos = 12; num_kmers_in_partition = 36738 + building MPHF with 64 threads and 1 partitions (avg. partition size = 36738)... + built mphs[5] for 36738 kmers; bits/key = 2.4746 + built positions[5] for 36738 kmers; bits/key = 12.0098 +=== step 7.2 (build skew index): 0.662961 [sec] (0.576288 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 99.2421% +buckets with 2 minimizer positions = 0.571193% +buckets with 3 minimizer positions = 0.0840564% +buckets with 4 minimizer positions = 0.0345576% +buckets with 5 minimizer positions = 0.0184735% +buckets with 6 minimizer positions = 0.011514% +buckets with 7 minimizer positions = 0.00772718% +buckets with 8 minimizer positions = 0.00537809% +buckets with 9 minimizer positions = 0.00392246% +buckets with 10 minimizer positions = 0.00304862% +buckets with 11 minimizer positions = 0.00236583% +buckets with 12 minimizer positions = 0.0019139% +buckets with 13 minimizer positions = 0.00156356% +buckets with 14 minimizer positions = 0.00127613% +buckets with 15 minimizer positions = 0.00105276% +buckets with 16 minimizer positions = 0.000939636% +max_bucket_size = 3747 +=== step 7 (build sparse and skew index): 2.48648 [sec] (2.1614 [ns/kmer]) +=== total time: 43.3371 [sec] (37.6713 [ns/kmer]) +total index size: 1077792939 [B] -- 1077.79 [MB] +SPACE BREAKDOWN: + mphf: 0.426302 [bits/kmer] (2.83055 [bits/key]) -- 5.68775% + strings_offsets: 0.100021 [bits/kmer] -- 1.33449% + control_codewords: 4.81944 [bits/kmer] -- 64.3013% + mid_load_buckets: 0.102577 [bits/kmer] -- 1.36859% + begin_buckets_of_size: 1.8637e-06 [bits/kmer] -- 2.48656e-05% + strings: 2.0304 [bits/kmer] -- 27.0897% + skew_index: 0.0163494 [bits/kmer] -- 0.218135% + weights: 1.27956e-06 [bits/kmer] -- 1.70719e-05% + -------------- + total: 7.49509 [bits/kmer] +2025-11-10 22:51:29: saving data structure to disk... +2025-11-10 22:51:34: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.sshash +2025-11-10 22:51:34: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +read 1000000 sequences, 245137036 bases, 215137036 kmers +read 2000000 sequences, 492922311 bases, 432922311 kmers +read 3000000 sequences, 743136741 bases, 653136741 kmers +read 4000000 sequences, 997901947 bases, 877901947 kmers +read 5000000 sequences, 1259012338 bases, 1109012338 kmers +read 6000000 sequences, 1534166192 bases, 1354166192 kmers +read 7000000 sequences, 1835088477 bases, 1625088477 kmers +read 8000000 sequences, 2224822737 bases, 1984822737 kmers +read 9000000 sequences, 2506972380 bases, 2236972380 kmers +read 10000000 sequences, 2751909556 bases, 2451909556 kmers +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +cost: 2.0 + 0.245454 [bits/kmer] +max string length = 35848 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 16 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.20465 [sec] (2.87533 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.64.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.65.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.66.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.67.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.68.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.69.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.70.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.71.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.72.bin'... +=== step 2 (compute minimizer tuples): 2.30993 [sec] (0.921877 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +=== step 3 (merging minimizer tuples): 21.7969 [sec] (8.699 [ns/kmer]) +num_minimizers = 386687326 +num_minimizer_positions = 423023926 +num_super_kmers = 423023926 +building minimizers MPHF with 64 threads and 129 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 15.2016 [sec] (6.06686 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811494198577529.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 14.8217 [sec] (5.91523 [ns/kmer]) +=== step 6 (merging minimizers tuples): 47.6208 [sec] (19.0052 [ns/kmer]) +num_bits_per_offset = 32 +num_buckets_larger_than_1_not_in_skew_index 10816752/386687326 (2.79729%) +num_buckets_in_skew_index 42372/386687326 (0.0109577%) +max_bucket_size 22972 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 40422973/423023926 (9.55572%) +num_minimizer_positions_of_buckets_in_skew_index 6772751/423023926 (1.60103%) +=== step 7.1 (build sparse index): 4.9601 [sec] (1.97954 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 11807213 + partition = 1: num kmers in buckets of size > 128 and <= 256: 8389556 + partition = 2: num kmers in buckets of size > 256 and <= 512: 5343660 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 3076413 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 1855446 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 1008178 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 375770 + partition = 7: num kmers in buckets of size > 8192 and <= 22972: 207510 +num kmers in skew index = 32063746 (1.27964%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 11807213 + building MPHF with 64 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[0] for 11807213 kmers; bits/key = 2.55841 + built positions[0] for 11807213 kmers; bits/key = 7.00003 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 8389556 + building MPHF with 64 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[1] for 8389556 kmers; bits/key = 2.61352 + built positions[1] for 8389556 kmers; bits/key = 8.00004 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 5343660 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[2] for 5343660 kmers; bits/key = 2.66261 + built positions[2] for 5343660 kmers; bits/key = 9.00007 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 3076413 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[3] for 3076413 kmers; bits/key = 3.34298 + built positions[3] for 3076413 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 1855446 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1855446)... + built mphs[4] for 1855446 kmers; bits/key = 2.56022 + built positions[4] for 1855446 kmers; bits/key = 11.0002 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1008178 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1008178)... + built mphs[5] for 1008178 kmers; bits/key = 2.41829 + built positions[5] for 1008178 kmers; bits/key = 12.0004 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 375770 + building MPHF with 64 threads and 1 partitions (avg. partition size = 375770)... + built mphs[6] for 375770 kmers; bits/key = 2.42182 + built positions[6] for 375770 kmers; bits/key = 13.001 + lower = 8192; upper = 22972; num_bits_per_pos = 15; num_kmers_in_partition = 207510 + building MPHF with 64 threads and 1 partitions (avg. partition size = 207510)... + built mphs[7] for 207510 kmers; bits/key = 2.42618 + built positions[7] for 207510 kmers; bits/key = 15.0018 +=== step 7.2 (build skew index): 7.00384 [sec] (2.79519 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.1918% +buckets with 2 minimizer positions = 1.69205% +buckets with 3 minimizer positions = 0.442612% +buckets with 4 minimizer positions = 0.201662% +buckets with 5 minimizer positions = 0.114623% +buckets with 6 minimizer positions = 0.0734475% +buckets with 7 minimizer positions = 0.0506719% +buckets with 8 minimizer positions = 0.0368406% +buckets with 9 minimizer positions = 0.0280203% +buckets with 10 minimizer positions = 0.0218197% +buckets with 11 minimizer positions = 0.0175108% +buckets with 12 minimizer positions = 0.0142045% +buckets with 13 minimizer positions = 0.0116337% +buckets with 14 minimizer positions = 0.00980818% +buckets with 15 minimizer positions = 0.00832637% +buckets with 16 minimizer positions = 0.00717712% +max_bucket_size = 22972 +=== step 7 (build sparse and skew index): 12.8422 [sec] (5.12524 [ns/kmer]) +=== total time: 121.798 [sec] (48.6087 [ns/kmer]) +total index size: 2716053224 [B] -- 2716.05 [MB] +SPACE BREAKDOWN: + mphf: 0.436199 [bits/kmer] (2.8265 [bits/key]) -- 5.03015% + strings_offsets: 0.153147 [bits/kmer] -- 1.76606% + control_codewords: 5.0927 [bits/kmer] -- 58.7281% + mid_load_buckets: 0.516242 [bits/kmer] -- 5.95319% + begin_buckets_of_size: 8.55656e-07 [bits/kmer] -- 9.86726e-06% + strings: 2.24545 [bits/kmer] -- 25.8941% + skew_index: 0.227926 [bits/kmer] -- 2.6284% + weights: 5.87466e-07 [bits/kmer] -- 6.77454e-06% + -------------- + total: 8.67167 [bits/kmer] +2025-11-10 22:53:35: saving data structure to disk... +2025-11-10 22:53:48: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/axolotl.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k31.sshash +2025-11-10 22:53:48: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/axolotl.k31.eulertigs.fa.gz'... +read 1000000 sequences, 129490969 bases, 99490969 kmers +read 2000000 sequences, 259406781 bases, 199406781 kmers +read 3000000 sequences, 390247383 bases, 300247383 kmers +read 4000000 sequences, 521879077 bases, 401879077 kmers +read 5000000 sequences, 654383925 bases, 504383925 kmers +read 6000000 sequences, 787748040 bases, 607748040 kmers +read 7000000 sequences, 921978207 bases, 711978207 kmers +read 8000000 sequences, 1056572544 bases, 816572544 kmers +read 9000000 sequences, 1192074067 bases, 922074067 kmers +read 10000000 sequences, 1328585611 bases, 1028585611 kmers +read 11000000 sequences, 1466068193 bases, 1136068193 kmers +read 12000000 sequences, 1604189526 bases, 1244189526 kmers +read 13000000 sequences, 1743096378 bases, 1353096378 kmers +read 14000000 sequences, 1882900221 bases, 1462900221 kmers +read 15000000 sequences, 2023699884 bases, 1573699884 kmers +read 16000000 sequences, 2165862057 bases, 1685862057 kmers +read 17000000 sequences, 2309661964 bases, 1799661964 kmers +read 18000000 sequences, 2454684974 bases, 1914684974 kmers +read 19000000 sequences, 2601025569 bases, 2031025569 kmers +read 20000000 sequences, 2749275100 bases, 2149275100 kmers +read 21000000 sequences, 2898804831 bases, 2268804831 kmers +read 22000000 sequences, 3049858713 bases, 2389858713 kmers +read 23000000 sequences, 3203100307 bases, 2513100307 kmers +read 24000000 sequences, 3357729055 bases, 2637729055 kmers +read 25000000 sequences, 3514381494 bases, 2764381494 kmers +read 26000000 sequences, 3673423010 bases, 2893423010 kmers +read 27000000 sequences, 3833876723 bases, 3023876723 kmers +read 28000000 sequences, 3997541031 bases, 3157541031 kmers +read 29000000 sequences, 4163233674 bases, 3293233674 kmers +read 30000000 sequences, 4331783042 bases, 3431783042 kmers +read 31000000 sequences, 4504035303 bases, 3574035303 kmers +read 32000000 sequences, 4679090399 bases, 3719090399 kmers +read 33000000 sequences, 4858436877 bases, 3868436877 kmers +read 34000000 sequences, 5041637740 bases, 4021637740 kmers +read 35000000 sequences, 5229976324 bases, 4179976324 kmers +read 36000000 sequences, 5423490866 bases, 4343490866 kmers +read 37000000 sequences, 5622911177 bases, 4512911177 kmers +read 38000000 sequences, 5828307256 bases, 4688307256 kmers +read 39000000 sequences, 6041904282 bases, 4871904282 kmers +read 40000000 sequences, 6264018359 bases, 5064018359 kmers +read 41000000 sequences, 6497175042 bases, 5267175042 kmers +read 42000000 sequences, 6743365189 bases, 5483365189 kmers +read 43000000 sequences, 7005667896 bases, 5715667896 kmers +read 44000000 sequences, 7289398126 bases, 5969398126 kmers +read 45000000 sequences, 7600998045 bases, 6250998045 kmers +read 46000000 sequences, 7846412811 bases, 6466412811 kmers +read 47000000 sequences, 7950267050 bases, 6540267050 kmers +read 48000000 sequences, 8054449490 bases, 6614449490 kmers +read 49000000 sequences, 8158503893 bases, 6688503893 kmers +read 50000000 sequences, 8262623298 bases, 6762623298 kmers +read 51000000 sequences, 8366491419 bases, 6836491419 kmers +read 52000000 sequences, 8470360694 bases, 6910360694 kmers +read 53000000 sequences, 8574554064 bases, 6984554064 kmers +read 54000000 sequences, 8678752038 bases, 7058752038 kmers +read 55000000 sequences, 8783318704 bases, 7133318704 kmers +read 56000000 sequences, 8887348929 bases, 7207348929 kmers +read 57000000 sequences, 8991244574 bases, 7281244574 kmers +read 58000000 sequences, 9095727588 bases, 7355727588 kmers +read 59000000 sequences, 9200181084 bases, 7430181084 kmers +read 60000000 sequences, 9304538500 bases, 7504538500 kmers +read 61000000 sequences, 9409280029 bases, 7579280029 kmers +read 62000000 sequences, 9513939845 bases, 7653939845 kmers +read 63000000 sequences, 9618472370 bases, 7728472370 kmers +read 64000000 sequences, 9723234224 bases, 7803234224 kmers +read 65000000 sequences, 9827751954 bases, 7877751954 kmers +read 66000000 sequences, 9932020550 bases, 7952020550 kmers +read 67000000 sequences, 10036741400 bases, 8026741400 kmers +read 68000000 sequences, 10141739679 bases, 8101739679 kmers +read 69000000 sequences, 10246720968 bases, 8176720968 kmers +read 70000000 sequences, 10351924281 bases, 8251924281 kmers +read 71000000 sequences, 10456872741 bases, 8326872741 kmers +read 72000000 sequences, 10562347711 bases, 8402347711 kmers +read 73000000 sequences, 10667420487 bases, 8477420487 kmers +read 74000000 sequences, 10772671579 bases, 8552671579 kmers +read 75000000 sequences, 10877868233 bases, 8627868233 kmers +read 76000000 sequences, 10983492844 bases, 8703492844 kmers +read 77000000 sequences, 11089073361 bases, 8779073361 kmers +read 78000000 sequences, 11194470848 bases, 8854470848 kmers +read 79000000 sequences, 11300332559 bases, 8930332559 kmers +read 80000000 sequences, 11406269822 bases, 9006269822 kmers +read 81000000 sequences, 11512162907 bases, 9082162907 kmers +read 82000000 sequences, 11618219813 bases, 9158219813 kmers +read 83000000 sequences, 11724078742 bases, 9234078742 kmers +read 84000000 sequences, 11830014073 bases, 9310014073 kmers +read 85000000 sequences, 11935890790 bases, 9385890790 kmers +read 86000000 sequences, 12042232134 bases, 9462232134 kmers +read 87000000 sequences, 12148382832 bases, 9538382832 kmers +read 88000000 sequences, 12254822411 bases, 9614822411 kmers +read 89000000 sequences, 12361563673 bases, 9691563673 kmers +read 90000000 sequences, 12468297930 bases, 9768297930 kmers +read 91000000 sequences, 12574912907 bases, 9844912907 kmers +read 92000000 sequences, 12681494065 bases, 9921494065 kmers +read 93000000 sequences, 12788194017 bases, 9998194017 kmers +read 94000000 sequences, 12894915381 bases, 10074915381 kmers +read 95000000 sequences, 13001521631 bases, 10151521631 kmers +read 96000000 sequences, 13109064835 bases, 10229064835 kmers +read 97000000 sequences, 13215873113 bases, 10305873113 kmers +read 98000000 sequences, 13323125980 bases, 10383125980 kmers +read 99000000 sequences, 13430215641 bases, 10460215641 kmers +read 100000000 sequences, 13537778344 bases, 10537778344 kmers +read 101000000 sequences, 13645148965 bases, 10615148965 kmers +read 102000000 sequences, 13752520277 bases, 10692520277 kmers +read 103000000 sequences, 13860127249 bases, 10770127249 kmers +read 104000000 sequences, 13968075956 bases, 10848075956 kmers +read 105000000 sequences, 14075861146 bases, 10925861146 kmers +read 106000000 sequences, 14184298738 bases, 11004298738 kmers +read 107000000 sequences, 14292530270 bases, 11082530270 kmers +read 108000000 sequences, 14400943968 bases, 11160943968 kmers +read 109000000 sequences, 14509339935 bases, 11239339935 kmers +read 110000000 sequences, 14617599335 bases, 11317599335 kmers +read 111000000 sequences, 14725842174 bases, 11395842174 kmers +read 112000000 sequences, 14834254989 bases, 11474254989 kmers +read 113000000 sequences, 14942804338 bases, 11552804338 kmers +read 114000000 sequences, 15051637733 bases, 11631637733 kmers +read 115000000 sequences, 15160739429 bases, 11710739429 kmers +read 116000000 sequences, 15269978487 bases, 11789978487 kmers +read 117000000 sequences, 15378990148 bases, 11868990148 kmers +read 118000000 sequences, 15488236558 bases, 11948236558 kmers +read 119000000 sequences, 15598141514 bases, 12028141514 kmers +read 120000000 sequences, 15707567895 bases, 12107567895 kmers +read 121000000 sequences, 15817378418 bases, 12187378418 kmers +read 122000000 sequences, 15927205756 bases, 12267205756 kmers +read 123000000 sequences, 16037448540 bases, 12347448540 kmers +read 124000000 sequences, 16147936166 bases, 12427936166 kmers +read 125000000 sequences, 16258467588 bases, 12508467588 kmers +read 126000000 sequences, 16368795492 bases, 12588795492 kmers +read 127000000 sequences, 16479785214 bases, 12669785214 kmers +read 128000000 sequences, 16590282249 bases, 12750282249 kmers +read 129000000 sequences, 16701057677 bases, 12831057677 kmers +read 130000000 sequences, 16812553081 bases, 12912553081 kmers +read 131000000 sequences, 16923449047 bases, 12993449047 kmers +read 132000000 sequences, 17034230526 bases, 13074230526 kmers +read 133000000 sequences, 17145713815 bases, 13155713815 kmers +read 134000000 sequences, 17257389525 bases, 13237389525 kmers +read 135000000 sequences, 17369130838 bases, 13319130838 kmers +read 136000000 sequences, 17481314596 bases, 13401314596 kmers +read 137000000 sequences, 17593629072 bases, 13483629072 kmers +read 138000000 sequences, 17706229726 bases, 13566229726 kmers +read 139000000 sequences, 17818270886 bases, 13648270886 kmers +read 140000000 sequences, 17931420451 bases, 13731420451 kmers +read 141000000 sequences, 18044353871 bases, 13814353871 kmers +read 142000000 sequences, 18157903240 bases, 13897903240 kmers +read 143000000 sequences, 18271415292 bases, 13981415292 kmers +read 144000000 sequences, 18384555504 bases, 14064555504 kmers +read 145000000 sequences, 18498314118 bases, 14148314118 kmers +read 146000000 sequences, 18612140169 bases, 14232140169 kmers +read 147000000 sequences, 18726166960 bases, 14316166960 kmers +read 148000000 sequences, 18840350948 bases, 14400350948 kmers +read 149000000 sequences, 18954724883 bases, 14484724883 kmers +read 150000000 sequences, 19069173447 bases, 14569173447 kmers +read 151000000 sequences, 19183793062 bases, 14653793062 kmers +read 152000000 sequences, 19298944468 bases, 14738944468 kmers +read 153000000 sequences, 19414656615 bases, 14824656615 kmers +read 154000000 sequences, 19530428728 bases, 14910428728 kmers +read 155000000 sequences, 19646614327 bases, 14996614327 kmers +read 156000000 sequences, 19762624488 bases, 15082624488 kmers +read 157000000 sequences, 19879115632 bases, 15169115632 kmers +read 158000000 sequences, 19995793294 bases, 15255793294 kmers +read 159000000 sequences, 20112771576 bases, 15342771576 kmers +read 160000000 sequences, 20230295571 bases, 15430295571 kmers +read 161000000 sequences, 20347943202 bases, 15517943202 kmers +read 162000000 sequences, 20465629376 bases, 15605629376 kmers +read 163000000 sequences, 20583555678 bases, 15693555678 kmers +read 164000000 sequences, 20701871322 bases, 15781871322 kmers +read 165000000 sequences, 20820410777 bases, 15870410777 kmers +read 166000000 sequences, 20939977253 bases, 15959977253 kmers +read 167000000 sequences, 21059538950 bases, 16049538950 kmers +read 168000000 sequences, 21179653875 bases, 16139653875 kmers +read 169000000 sequences, 21300125826 bases, 16230125826 kmers +read 170000000 sequences, 21421275644 bases, 16321275644 kmers +read 171000000 sequences, 21542230532 bases, 16412230532 kmers +read 172000000 sequences, 21663705881 bases, 16503705881 kmers +read 173000000 sequences, 21785083912 bases, 16595083912 kmers +read 174000000 sequences, 21907256668 bases, 16687256668 kmers +read 175000000 sequences, 22029743385 bases, 16779743385 kmers +read 176000000 sequences, 22152051369 bases, 16872051369 kmers +read 177000000 sequences, 22275733581 bases, 16965733581 kmers +read 178000000 sequences, 22399701749 bases, 17059701749 kmers +read 179000000 sequences, 22524206009 bases, 17154206009 kmers +read 180000000 sequences, 22648755725 bases, 17248755725 kmers +read 181000000 sequences, 22773928274 bases, 17343928274 kmers +read 182000000 sequences, 22899987762 bases, 17439987762 kmers +read 183000000 sequences, 23025485159 bases, 17535485159 kmers +read 184000000 sequences, 23152188885 bases, 17632188885 kmers +read 185000000 sequences, 23279392102 bases, 17729392102 kmers +read 186000000 sequences, 23406527093 bases, 17826527093 kmers +read 187000000 sequences, 23534989577 bases, 17924989577 kmers +read 187636048 sequences, 23617016620 bases, 17987935180 kmers +num_kmers 17987935180 +cost: 2.0 + 0.625873 [bits/kmer] +max string length = 38851 +num bits per_absolute_offset = 35 +num bits per_relative_offset = 16 +num bits per_string_id = 28 +=== step 1 (encode strings): 74.0356 [sec] (4.11585 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.64.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.65.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.66.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.67.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.68.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.69.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.70.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.71.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.72.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.73.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.74.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.75.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.76.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.77.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.78.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.79.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.80.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.81.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.82.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.83.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.84.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.85.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.86.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.87.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.88.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.89.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.90.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.91.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.92.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.93.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.94.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.95.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.96.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.97.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.98.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.99.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.100.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.101.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.102.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.103.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.104.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.105.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.106.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.107.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.108.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.109.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.110.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.111.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.112.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.113.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.114.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.115.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.116.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.117.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.118.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.119.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.120.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.121.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.122.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.123.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.124.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.125.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.126.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.127.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.128.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.129.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.130.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.131.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.132.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.133.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.134.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.135.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.136.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.137.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.138.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.139.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.140.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.141.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.142.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.143.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.144.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.145.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.146.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.147.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.148.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.149.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.150.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.151.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.152.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.153.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.154.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.155.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.156.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.157.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.158.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.159.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.160.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.161.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.162.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.163.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.164.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.165.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.166.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.167.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.168.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.169.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.170.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.171.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.172.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.173.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.174.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.175.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.176.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.177.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.178.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.179.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.180.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.181.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.182.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.183.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.184.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.185.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.186.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.187.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.188.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.189.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.190.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.191.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.192.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.193.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.194.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.195.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.196.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.197.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.198.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.199.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.200.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.201.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.202.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.203.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.204.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.205.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.206.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.207.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.208.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.209.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.210.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.211.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.212.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.213.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.214.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.215.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.216.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.217.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.218.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.219.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.220.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.221.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.222.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.223.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.224.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.225.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.226.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.227.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.228.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.229.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.230.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.231.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.232.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.233.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.234.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.235.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.236.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.237.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.238.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.239.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.240.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.241.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.242.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.243.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.244.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.245.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.246.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.247.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.248.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.249.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.250.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.251.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.252.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.253.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.254.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.255.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.256.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.257.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.258.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.259.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.260.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.261.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.262.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.263.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.264.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.265.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.266.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.267.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.268.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.269.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.270.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.271.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.272.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.273.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.274.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.275.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.276.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.277.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.278.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.279.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.280.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.281.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.282.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.283.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.284.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.285.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.286.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.287.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.288.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.289.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.290.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.291.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.292.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.293.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.294.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.295.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.296.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.297.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.298.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.299.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.300.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.301.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.302.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.303.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.304.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.305.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.306.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.307.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.308.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.309.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.310.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.311.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.312.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.313.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.314.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.315.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.316.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.317.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.318.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.319.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.320.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.321.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.322.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.323.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.324.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.325.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.326.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.327.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.328.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.329.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.330.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.331.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.332.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.333.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.334.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.335.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.336.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.337.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.338.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.339.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.340.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.341.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.342.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.343.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.344.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.345.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.346.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.347.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.348.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.349.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.350.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.351.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.352.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.353.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.354.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.355.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.356.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.357.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.358.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.359.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.360.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.361.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.362.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.363.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.364.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.365.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.366.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.367.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.368.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.369.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.370.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.371.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.372.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.373.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.374.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.375.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.376.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.377.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.378.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.379.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.380.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.381.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.382.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.383.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.384.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.385.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.386.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.387.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.388.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.389.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.390.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.391.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.392.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.393.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.394.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.395.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.396.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.397.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.398.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.399.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.400.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.401.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.402.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.403.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.404.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.405.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.406.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.407.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.408.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.409.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.410.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.411.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.412.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.413.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.414.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.415.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.416.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.417.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.418.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.419.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.420.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.421.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.422.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.423.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.424.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.425.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.426.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.427.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.428.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.429.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.430.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.431.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.432.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.433.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.434.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.435.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.436.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.437.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.438.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.439.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.440.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.441.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.442.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.443.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.444.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.445.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.446.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.447.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.448.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.449.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.450.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.451.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.452.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.453.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.454.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.455.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.456.bin'... +=== step 2 (compute minimizer tuples): 85.5798 [sec] (4.75762 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +processed 700000000 minimizer tuples +processed 800000000 minimizer tuples +processed 900000000 minimizer tuples +processed 1000000000 minimizer tuples +processed 1100000000 minimizer tuples +processed 1200000000 minimizer tuples +processed 1300000000 minimizer tuples +processed 1400000000 minimizer tuples +processed 1500000000 minimizer tuples +processed 1600000000 minimizer tuples +processed 1700000000 minimizer tuples +processed 1800000000 minimizer tuples +processed 1900000000 minimizer tuples +processed 2000000000 minimizer tuples +processed 2100000000 minimizer tuples +processed 2200000000 minimizer tuples +processed 2300000000 minimizer tuples +processed 2400000000 minimizer tuples +processed 2500000000 minimizer tuples +processed 2600000000 minimizer tuples +processed 2700000000 minimizer tuples +processed 2800000000 minimizer tuples +processed 2900000000 minimizer tuples +processed 3000000000 minimizer tuples +processed 3100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 1099.54 [sec] (61.1264 [ns/kmer]) +num_minimizers = 2586967125 +num_minimizer_positions = 3137471712 +num_super_kmers = 3137471712 +building minimizers MPHF with 64 threads and 863 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 206.294 [sec] (11.4685 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.6.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 109.117 [sec] (6.0661 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762811628522837389.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +processed 700000000 minimizer tuples +processed 800000000 minimizer tuples +processed 900000000 minimizer tuples +processed 1000000000 minimizer tuples +processed 1100000000 minimizer tuples +processed 1200000000 minimizer tuples +processed 1300000000 minimizer tuples +processed 1400000000 minimizer tuples +processed 1500000000 minimizer tuples +processed 1600000000 minimizer tuples +processed 1700000000 minimizer tuples +processed 1800000000 minimizer tuples +processed 1900000000 minimizer tuples +processed 2000000000 minimizer tuples +processed 2100000000 minimizer tuples +processed 2200000000 minimizer tuples +processed 2300000000 minimizer tuples +processed 2400000000 minimizer tuples +processed 2500000000 minimizer tuples +processed 2600000000 minimizer tuples +processed 2700000000 minimizer tuples +processed 2800000000 minimizer tuples +processed 2900000000 minimizer tuples +processed 3000000000 minimizer tuples +processed 3100000000 minimizer tuples +=== step 6 (merging minimizers tuples): 415.63 [sec] (23.106 [ns/kmer]) +num_bits_per_offset = 35 +num_buckets_larger_than_1_not_in_skew_index 201722347/2586967125 (7.79764%) +num_buckets_in_skew_index 377105/2586967125 (0.0145771%) +max_bucket_size 63221 +log2_max_bucket_size 16 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 705347646/3137471712 (22.4814%) +num_minimizer_positions_of_buckets_in_skew_index 47256393/3137471712 (1.50619%) +=== step 7.1 (build sparse index): 54.9696 [sec] (3.05592 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 118809610 + partition = 1: num kmers in buckets of size > 128 and <= 256: 63637532 + partition = 2: num kmers in buckets of size > 256 and <= 512: 28814382 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 11075511 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 4175522 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 1800377 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 653242 + partition = 7: num kmers in buckets of size > 8192 and <= 63221: 493725 +num kmers in skew index = 229459901 (1.27563%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 118809610 + building MPHF with 64 threads and 40 partitions (avg. partition size = 3000000)... + built mphs[0] for 118809610 kmers; bits/key = 2.56673 + built positions[0] for 118809610 kmers; bits/key = 7 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 63637532 + building MPHF with 64 threads and 22 partitions (avg. partition size = 3000000)... + built mphs[1] for 63637532 kmers; bits/key = 2.51805 + built positions[1] for 63637532 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 28814382 + building MPHF with 64 threads and 10 partitions (avg. partition size = 3000000)... + built mphs[2] for 28814382 kmers; bits/key = 2.5289 + built positions[2] for 28814382 kmers; bits/key = 9.00001 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 11075511 + building MPHF with 64 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[3] for 11075511 kmers; bits/key = 2.58384 + built positions[3] for 11075511 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 4175522 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[4] for 4175522 kmers; bits/key = 2.88051 + built positions[4] for 4175522 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1800377 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1800377)... + built mphs[5] for 1800377 kmers; bits/key = 2.56025 + built positions[5] for 1800377 kmers; bits/key = 12.0002 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 653242 + building MPHF with 64 threads and 1 partitions (avg. partition size = 653242)... + built mphs[6] for 653242 kmers; bits/key = 2.41942 + built positions[6] for 653242 kmers; bits/key = 13.0005 + lower = 8192; upper = 63221; num_bits_per_pos = 16; num_kmers_in_partition = 493725 + building MPHF with 64 threads and 1 partitions (avg. partition size = 493725)... + built mphs[7] for 493725 kmers; bits/key = 2.42055 + built positions[7] for 493725 kmers; bits/key = 16.0007 +=== step 7.2 (build skew index): 33.9498 [sec] (1.88737 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 92.1878% +buckets with 2 minimizer positions = 4.65692% +buckets with 3 minimizer positions = 1.35588% +buckets with 4 minimizer positions = 0.60708% +buckets with 5 minimizer positions = 0.331836% +buckets with 6 minimizer positions = 0.203718% +buckets with 7 minimizer positions = 0.135291% +buckets with 8 minimizer positions = 0.095017% +buckets with 9 minimizer positions = 0.0696812% +buckets with 10 minimizer positions = 0.0528772% +buckets with 11 minimizer positions = 0.0411865% +buckets with 12 minimizer positions = 0.0328397% +buckets with 13 minimizer positions = 0.0267158% +buckets with 14 minimizer positions = 0.0219975% +buckets with 15 minimizer positions = 0.018403% +buckets with 16 minimizer positions = 0.0155512% +max_bucket_size = 63221 +=== step 7 (build sparse and skew index): 96.1805 [sec] (5.34694 [ns/kmer]) +=== total time: 2086.37 [sec] (115.987 [ns/kmer]) +total index size: 22620878165 [B] -- 22620.9 [MB] +SPACE BREAKDOWN: + mphf: 0.406843 [bits/kmer] (2.8289 [bits/key]) -- 4.04398% + strings_offsets: 0.253608 [bits/kmer] -- 2.52084% + control_codewords: 5.1774 [bits/kmer] -- 51.4629% + mid_load_buckets: 1.37243 [bits/kmer] -- 13.6418% + begin_buckets_of_size: 1.19191e-07 [bits/kmer] -- 1.18475e-06% + strings: 2.62587 [bits/kmer] -- 26.1009% + skew_index: 0.224309 [bits/kmer] -- 2.2296% + weights: 8.18326e-08 [bits/kmer] -- 8.13408e-07% + -------------- + total: 10.0605 [bits/kmer] +2025-11-10 23:28:34: saving data structure to disk... +2025-11-10 23:30:22: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.sshash +2025-11-10 23:30:23: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/hprc.k31.eulertigs.fa.gz'... +read 1000000 sequences, 159860354 bases, 129860354 kmers +read 2000000 sequences, 345519042 bases, 285519042 kmers +read 3000000 sequences, 569210425 bases, 479210425 kmers +read 4000000 sequences, 848332212 bases, 728332212 kmers +read 5000000 sequences, 1226889961 bases, 1076889961 kmers +read 6000000 sequences, 1800462808 bases, 1620462808 kmers +read 7000000 sequences, 1906975392 bases, 1696975392 kmers +read 8000000 sequences, 1964117272 bases, 1724117272 kmers +read 9000000 sequences, 2021346703 bases, 1751346703 kmers +read 10000000 sequences, 2078777105 bases, 1778777105 kmers +read 11000000 sequences, 2136245853 bases, 1806245853 kmers +read 12000000 sequences, 2193864516 bases, 1833864516 kmers +read 13000000 sequences, 2251713140 bases, 1861713140 kmers +read 14000000 sequences, 2309685311 bases, 1889685311 kmers +read 15000000 sequences, 2367830861 bases, 1917830861 kmers +read 16000000 sequences, 2426185107 bases, 1946185107 kmers +read 17000000 sequences, 2484756357 bases, 1974756357 kmers +read 18000000 sequences, 2543560790 bases, 2003560790 kmers +read 19000000 sequences, 2602544828 bases, 2032544828 kmers +read 20000000 sequences, 2661829332 bases, 2061829332 kmers +read 21000000 sequences, 2721408473 bases, 2091408473 kmers +read 22000000 sequences, 2781228842 bases, 2121228842 kmers +read 23000000 sequences, 2841415119 bases, 2151415119 kmers +read 24000000 sequences, 2901936379 bases, 2181936379 kmers +read 25000000 sequences, 2962750749 bases, 2212750749 kmers +read 26000000 sequences, 3023914429 bases, 2243914429 kmers +read 27000000 sequences, 3085556058 bases, 2275556058 kmers +read 28000000 sequences, 3147523815 bases, 2307523815 kmers +read 29000000 sequences, 3209891758 bases, 2339891758 kmers +read 30000000 sequences, 3272761181 bases, 2372761181 kmers +read 31000000 sequences, 3336150965 bases, 2406150965 kmers +read 32000000 sequences, 3400254734 bases, 2440254734 kmers +read 33000000 sequences, 3464886783 bases, 2474886783 kmers +read 34000000 sequences, 3530247184 bases, 2510247184 kmers +read 35000000 sequences, 3596273843 bases, 2546273843 kmers +read 36000000 sequences, 3663044813 bases, 2583044813 kmers +read 37000000 sequences, 3730743513 bases, 2620743513 kmers +read 38000000 sequences, 3799297920 bases, 2659297920 kmers +read 39000000 sequences, 3869022100 bases, 2699022100 kmers +read 40000000 sequences, 3939899906 bases, 2739899906 kmers +read 41000000 sequences, 4011944353 bases, 2781944353 kmers +read 42000000 sequences, 4085447760 bases, 2825447760 kmers +read 43000000 sequences, 4160667187 bases, 2870667187 kmers +read 44000000 sequences, 4237696486 bases, 2917696486 kmers +read 45000000 sequences, 4316730755 bases, 2966730755 kmers +read 46000000 sequences, 4398064724 bases, 3018064724 kmers +read 47000000 sequences, 4482251464 bases, 3072251464 kmers +read 48000000 sequences, 4569570617 bases, 3129570617 kmers +read 49000000 sequences, 4660631625 bases, 3190631625 kmers +read 50000000 sequences, 4756246344 bases, 3256246344 kmers +read 51000000 sequences, 4856753463 bases, 3326753463 kmers +read 52000000 sequences, 4964398717 bases, 3404398717 kmers +read 53000000 sequences, 5079791551 bases, 3489791551 kmers +read 54000000 sequences, 5205070836 bases, 3585070836 kmers +read 55000000 sequences, 5343495625 bases, 3693495625 kmers +read 55207753 sequences, 5374353539 bases, 3718120949 kmers +num_kmers 3718120949 +cost: 2.0 + 0.890898 [bits/kmer] +max string length = 17920 +num bits per_absolute_offset = 33 +num bits per_relative_offset = 15 +num bits per_string_id = 26 +=== step 1 (encode strings): 17.5857 [sec] (4.72971 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.64.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.65.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.66.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.67.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.68.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.69.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.70.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.71.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.72.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.73.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.74.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.75.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.76.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.77.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.78.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.79.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.80.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.81.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.82.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.83.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.84.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.85.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.86.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.87.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.88.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.89.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.90.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.91.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.92.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.93.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.94.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.95.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.96.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.97.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.98.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.99.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.100.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.101.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.102.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.103.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.104.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.105.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.106.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.107.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.108.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.109.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.110.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.111.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.112.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.113.bin'... +=== step 2 (compute minimizer tuples): 6.54952 [sec] (1.76151 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +=== step 3 (merging minimizer tuples): 57.673 [sec] (15.5113 [ns/kmer]) +num_minimizers = 544808214 +num_minimizer_positions = 661139039 +num_super_kmers = 661139039 +building minimizers MPHF with 64 threads and 182 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 21.1012 [sec] (5.67524 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.1.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 22.4717 [sec] (6.04383 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762813823746747470.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +=== step 6 (merging minimizers tuples): 95.4467 [sec] (25.6707 [ns/kmer]) +num_bits_per_offset = 33 +num_buckets_larger_than_1_not_in_skew_index 51666891/544808214 (9.4835%) +num_buckets_in_skew_index 108291/544808214 (0.0198769%) +max_bucket_size 81171 +log2_max_bucket_size 17 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 149056975/661139039 (22.5455%) +num_minimizer_positions_of_buckets_in_skew_index 19049032/661139039 (2.88124%) +=== step 7.1 (build sparse index): 11.2203 [sec] (3.01773 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 28872109 + partition = 1: num kmers in buckets of size > 128 and <= 256: 21161002 + partition = 2: num kmers in buckets of size > 256 and <= 512: 14619521 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 8967010 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 5825185 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 3889571 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 2412718 + partition = 7: num kmers in buckets of size > 8192 and <= 81171: 2510316 +num kmers in skew index = 88257432 (2.37371%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 28872109 + building MPHF with 64 threads and 10 partitions (avg. partition size = 3000000)... + built mphs[0] for 28872109 kmers; bits/key = 2.53953 + built positions[0] for 28872109 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 21161002 + building MPHF with 64 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[1] for 21161002 kmers; bits/key = 2.665 + built positions[1] for 21161002 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 14619521 + building MPHF with 64 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[2] for 14619521 kmers; bits/key = 2.58618 + built positions[2] for 14619521 kmers; bits/key = 9.00003 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 8967010 + building MPHF with 64 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[3] for 8967010 kmers; bits/key = 2.51981 + built positions[3] for 8967010 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 5825185 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[4] for 5825185 kmers; bits/key = 2.55049 + built positions[4] for 5825185 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 3889571 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[5] for 3889571 kmers; bits/key = 2.9515 + built positions[5] for 3889571 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 2412718 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2412718)... + built mphs[6] for 2412718 kmers; bits/key = 2.41709 + built positions[6] for 2412718 kmers; bits/key = 13.0002 + lower = 8192; upper = 81171; num_bits_per_pos = 17; num_kmers_in_partition = 2510316 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2510316)... + built mphs[7] for 2510316 kmers; bits/key = 2.55991 + built positions[7] for 2510316 kmers; bits/key = 17.0001 +=== step 7.2 (build skew index): 15.015 [sec] (4.03833 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 90.4966% +buckets with 2 minimizer positions = 7.33042% +buckets with 3 minimizer positions = 1.04104% +buckets with 4 minimizer positions = 0.358078% +buckets with 5 minimizer positions = 0.190444% +buckets with 6 minimizer positions = 0.119606% +buckets with 7 minimizer positions = 0.0815559% +buckets with 8 minimizer positions = 0.059031% +buckets with 9 minimizer positions = 0.0448457% +buckets with 10 minimizer positions = 0.0349758% +buckets with 11 minimizer positions = 0.0279273% +buckets with 12 minimizer positions = 0.022929% +buckets with 13 minimizer positions = 0.0189582% +buckets with 14 minimizer positions = 0.0159649% +buckets with 15 minimizer positions = 0.0135407% +buckets with 16 minimizer positions = 0.0117506% +max_bucket_size = 81171 +=== step 7 (build sparse and skew index): 27.858 [sec] (7.49249 [ns/kmer]) +=== total time: 248.686 [sec] (66.8848 [ns/kmer]) +total index size: 4810783166 [B] -- 4810.78 [MB] +SPACE BREAKDOWN: + mphf: 0.415447 [bits/kmer] (2.83528 [bits/key]) -- 4.01359% + strings_offsets: 0.300083 [bits/kmer] -- 2.89907% + control_codewords: 4.98195 [bits/kmer] -- 48.1301% + mid_load_buckets: 1.32295 [bits/kmer] -- 12.7809% + begin_buckets_of_size: 5.76635e-07 [bits/kmer] -- 5.57082e-06% + strings: 2.8909 [bits/kmer] -- 27.9287% + skew_index: 0.439676 [bits/kmer] -- 4.24766% + weights: 3.95899e-07 [bits/kmer] -- 3.82474e-06% + -------------- + total: 10.351 [bits/kmer] +2025-11-10 23:34:32: saving data structure to disk... +2025-11-10 23:34:55: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ec.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/ec.k31.sshash +2025-11-10 23:34:56: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/ec.k31.eulertigs.fa.gz'... +read 1000000 sequences, 153195620 bases, 123195620 kmers +read 2000000 sequences, 238301856 bases, 178301856 kmers +read 3000000 sequences, 306827724 bases, 216827724 kmers +read 4000000 sequences, 376235441 bases, 256235441 kmers +read 5000000 sequences, 445233170 bases, 295233170 kmers +read 6000000 sequences, 515368260 bases, 335368260 kmers +read 7000000 sequences, 586116050 bases, 376116050 kmers +read 8000000 sequences, 657174193 bases, 417174193 kmers +read 9000000 sequences, 729536721 bases, 459536721 kmers +read 10000000 sequences, 802902838 bases, 502902838 kmers +read 11000000 sequences, 876372447 bases, 546372447 kmers +read 12000000 sequences, 951284053 bases, 591284053 kmers +read 13000000 sequences, 1027636701 bases, 637636701 kmers +read 14000000 sequences, 1105722693 bases, 685722693 kmers +read 15000000 sequences, 1185796892 bases, 735796892 kmers +read 16000000 sequences, 1267846293 bases, 787846293 kmers +read 17000000 sequences, 1352901026 bases, 842901026 kmers +read 18000000 sequences, 1442015880 bases, 902015880 kmers +read 19000000 sequences, 1536308350 bases, 966308350 kmers +read 20000000 sequences, 1639158516 bases, 1039158516 kmers +read 20822360 sequences, 1735689645 bases, 1111018845 kmers +num_kmers 1111018845 +cost: 2.0 + 1.1245 [bits/kmer] +max string length = 176455 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 18 +num bits per_string_id = 25 +=== step 1 (encode strings): 6.08674 [sec] (5.47852 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.64.bin'... +=== step 2 (compute minimizer tuples): 1.23083 [sec] (1.10784 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +=== step 3 (merging minimizer tuples): 10.6473 [sec] (9.58335 [ns/kmer]) +num_minimizers = 153003346 +num_minimizer_positions = 201447538 +num_super_kmers = 201447538 +building minimizers MPHF with 64 threads and 52 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 6.36665 [sec] (5.73047 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814096124463938.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 6.97711 [sec] (6.27992 [ns/kmer]) +=== step 6 (merging minimizers tuples): 29.6076 [sec] (26.6491 [ns/kmer]) +num_bits_per_offset = 31 +num_buckets_larger_than_1_not_in_skew_index 16447921/153003346 (10.75%) +num_buckets_in_skew_index 10727/153003346 (0.00701096%) +max_bucket_size 71743 +log2_max_bucket_size 17 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 61815481/201447538 (30.6856%) +num_minimizer_positions_of_buckets_in_skew_index 3087359/201447538 (1.53259%) +=== step 7.1 (build sparse index): 3.92996 [sec] (3.53725 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 2530324 + partition = 1: num kmers in buckets of size > 128 and <= 256: 1770374 + partition = 2: num kmers in buckets of size > 256 and <= 512: 1563484 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 1214204 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 1091924 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 1055593 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 789627 + partition = 7: num kmers in buckets of size > 8192 and <= 71743: 1030596 +num kmers in skew index = 11046126 (0.994234%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 2530324 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2530324)... + built mphs[0] for 2530324 kmers; bits/key = 2.5599 + built positions[0] for 2530324 kmers; bits/key = 7.00015 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 1770374 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1770374)... + built mphs[1] for 1770374 kmers; bits/key = 2.56029 + built positions[1] for 1770374 kmers; bits/key = 8.00019 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 1563484 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1563484)... + built mphs[2] for 1563484 kmers; bits/key = 2.56038 + built positions[2] for 1563484 kmers; bits/key = 9.00021 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 1214204 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1214204)... + built mphs[3] for 1214204 kmers; bits/key = 2.56081 + built positions[3] for 1214204 kmers; bits/key = 10.0003 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 1091924 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1091924)... + built mphs[4] for 1091924 kmers; bits/key = 2.4182 + built positions[4] for 1091924 kmers; bits/key = 11.0003 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1055593 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1055593)... + built mphs[5] for 1055593 kmers; bits/key = 2.56108 + built positions[5] for 1055593 kmers; bits/key = 12.0003 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 789627 + building MPHF with 64 threads and 1 partitions (avg. partition size = 789627)... + built mphs[6] for 789627 kmers; bits/key = 2.41888 + built positions[6] for 789627 kmers; bits/key = 13.0004 + lower = 8192; upper = 71743; num_bits_per_pos = 17; num_kmers_in_partition = 1030596 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1030596)... + built mphs[7] for 1030596 kmers; bits/key = 2.4183 + built positions[7] for 1030596 kmers; bits/key = 17.0004 +=== step 7.2 (build skew index): 6.08117 [sec] (5.4735 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 89.2429% +buckets with 2 minimizer positions = 6.05035% +buckets with 3 minimizer positions = 1.71539% +buckets with 4 minimizer positions = 0.818195% +buckets with 5 minimizer positions = 0.497438% +buckets with 6 minimizer positions = 0.343779% +buckets with 7 minimizer positions = 0.255749% +buckets with 8 minimizer positions = 0.199702% +buckets with 9 minimizer positions = 0.159387% +buckets with 10 minimizer positions = 0.13028% +buckets with 11 minimizer positions = 0.107217% +buckets with 12 minimizer positions = 0.0878229% +buckets with 13 minimizer positions = 0.0717429% +buckets with 14 minimizer positions = 0.0589601% +buckets with 15 minimizer positions = 0.0471898% +buckets with 16 minimizer positions = 0.0377861% +max_bucket_size = 71743 +=== step 7 (build sparse and skew index): 10.5438 [sec] (9.49019 [ns/kmer]) +=== total time: 71.46 [sec] (64.3194 [ns/kmer]) +total index size: 1416509371 [B] -- 1416.51 [MB] +SPACE BREAKDOWN: + mphf: 0.394931 [bits/kmer] (2.86775 [bits/key]) -- 3.87198% + strings_offsets: 0.337884 [bits/kmer] -- 3.31268% + control_codewords: 4.40686 [bits/kmer] -- 43.2057% + mid_load_buckets: 1.7248 [bits/kmer] -- 16.9102% + begin_buckets_of_size: 1.92976e-06 [bits/kmer] -- 1.89197e-05% + strings: 3.1245 [bits/kmer] -- 30.6332% + skew_index: 0.210737 [bits/kmer] -- 2.0661% + weights: 1.32491e-06 [bits/kmer] -- 1.29897e-05% + -------------- + total: 10.1997 [bits/kmer] +2025-11-10 23:36:07: saving data structure to disk... +2025-11-10 23:36:13: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.sshash +2025-11-10 23:36:13: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +read 1000000 sequences, 89525906 bases, 59525906 kmers +read 2000000 sequences, 188052064 bases, 128052064 kmers +read 3000000 sequences, 302142183 bases, 212142183 kmers +read 4000000 sequences, 461236524 bases, 341236524 kmers +read 5000000 sequences, 530371783 bases, 380371783 kmers +read 6000000 sequences, 600036489 bases, 420036489 kmers +read 7000000 sequences, 670072473 bases, 460072473 kmers +read 8000000 sequences, 740830673 bases, 500830673 kmers +read 9000000 sequences, 812530455 bases, 542530455 kmers +read 10000000 sequences, 884692153 bases, 584692153 kmers +read 11000000 sequences, 958222271 bases, 628222271 kmers +read 12000000 sequences, 1032736062 bases, 672736062 kmers +read 13000000 sequences, 1108501169 bases, 718501169 kmers +read 14000000 sequences, 1186158510 bases, 766158510 kmers +read 15000000 sequences, 1266102895 bases, 816102895 kmers +read 16000000 sequences, 1349263765 bases, 869263765 kmers +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +cost: 2.0 + 1.10303 [bits/kmer] +max string length = 117016 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 17 +num bits per_string_id = 24 +=== step 1 (encode strings): 5.39372 [sec] (6.03115 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.64.bin'... +=== step 2 (compute minimizer tuples): 1.02195 [sec] (1.14272 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 8.54153 [sec] (9.55097 [ns/kmer]) +num_minimizers = 126246665 +num_minimizer_positions = 162006751 +num_super_kmers = 162006751 +building minimizers MPHF with 64 threads and 43 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 5.25778 [sec] (5.87915 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814173844318210.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 5.62714 [sec] (6.29216 [ns/kmer]) +=== step 6 (merging minimizers tuples): 12.2754 [sec] (13.7261 [ns/kmer]) +num_bits_per_offset = 31 +num_buckets_larger_than_1_not_in_skew_index 14059268/126246665 (11.1363%) +num_buckets_in_skew_index 8266/126246665 (0.0065475%) +max_bucket_size 36894 +log2_max_bucket_size 16 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 48164669/162006751 (29.73%) +num_minimizer_positions_of_buckets_in_skew_index 1662951/162006751 (1.02647%) +=== step 7.1 (build sparse index): 3.11768 [sec] (3.48613 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 2254325 + partition = 1: num kmers in buckets of size > 128 and <= 256: 1183762 + partition = 2: num kmers in buckets of size > 256 and <= 512: 885561 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 591648 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 450833 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 373731 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 338406 + partition = 7: num kmers in buckets of size > 8192 and <= 36894: 388502 +num kmers in skew index = 6466768 (0.723101%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 2254325 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2254325)... + built mphs[0] for 2254325 kmers; bits/key = 2.56001 + built positions[0] for 2254325 kmers; bits/key = 7.00015 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 1183762 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1183762)... + built mphs[1] for 1183762 kmers; bits/key = 2.56081 + built positions[1] for 1183762 kmers; bits/key = 8.00031 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 885561 + building MPHF with 64 threads and 1 partitions (avg. partition size = 885561)... + built mphs[2] for 885561 kmers; bits/key = 2.56147 + built positions[2] for 885561 kmers; bits/key = 9.00043 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 591648 + building MPHF with 64 threads and 1 partitions (avg. partition size = 591648)... + built mphs[3] for 591648 kmers; bits/key = 2.56263 + built positions[3] for 591648 kmers; bits/key = 10.0005 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 450833 + building MPHF with 64 threads and 1 partitions (avg. partition size = 450833)... + built mphs[4] for 450833 kmers; bits/key = 2.42098 + built positions[4] for 450833 kmers; bits/key = 11.0007 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 373731 + building MPHF with 64 threads and 1 partitions (avg. partition size = 373731)... + built mphs[5] for 373731 kmers; bits/key = 2.56484 + built positions[5] for 373731 kmers; bits/key = 12.0009 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 338406 + building MPHF with 64 threads and 1 partitions (avg. partition size = 338406)... + built mphs[6] for 338406 kmers; bits/key = 2.4226 + built positions[6] for 338406 kmers; bits/key = 13.001 + lower = 8192; upper = 36894; num_bits_per_pos = 16; num_kmers_in_partition = 388502 + building MPHF with 64 threads and 1 partitions (avg. partition size = 388502)... + built mphs[7] for 388502 kmers; bits/key = 2.56472 + built positions[7] for 388502 kmers; bits/key = 16.0009 +=== step 7.2 (build skew index): 3.54555 [sec] (3.96456 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 88.8571% +buckets with 2 minimizer positions = 6.64024% +buckets with 3 minimizer positions = 1.83049% +buckets with 4 minimizer positions = 0.817717% +buckets with 5 minimizer positions = 0.475524% +buckets with 6 minimizer positions = 0.319677% +buckets with 7 minimizer positions = 0.233881% +buckets with 8 minimizer positions = 0.178737% +buckets with 9 minimizer positions = 0.138965% +buckets with 10 minimizer positions = 0.107245% +buckets with 11 minimizer positions = 0.0823246% +buckets with 12 minimizer positions = 0.0632579% +buckets with 13 minimizer positions = 0.0483173% +buckets with 14 minimizer positions = 0.0365673% +buckets with 15 minimizer positions = 0.027916% +buckets with 16 minimizer positions = 0.0218604% +max_bucket_size = 36894 +=== step 7 (build sparse and skew index): 7.08881 [sec] (7.92656 [ns/kmer]) +=== total time: 45.2063 [sec] (50.5488 [ns/kmer]) +total index size: 1137030140 [B] -- 1137.03 [MB] +SPACE BREAKDOWN: + mphf: 0.405702 [bits/kmer] (2.87392 [bits/key]) -- 3.98871% + strings_offsets: 0.333373 [bits/kmer] -- 3.2776% + control_codewords: 4.51733 [bits/kmer] -- 44.4128% + mid_load_buckets: 1.66956 [bits/kmer] -- 16.4145% + begin_buckets_of_size: 2.39738e-06 [bits/kmer] -- 2.35702e-05% + strings: 3.10303 [bits/kmer] -- 30.5079% + skew_index: 0.142237 [bits/kmer] -- 1.39842% + weights: 1.64596e-06 [bits/kmer] -- 1.61825e-05% + -------------- + total: 10.1712 [bits/kmer] +2025-11-10 23:36:59: saving data structure to disk... +2025-11-10 23:37:04: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k31.eulertigs.fa.gz -k 31 -m 19 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.sshash +2025-11-10 23:37:04: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k31.eulertigs.fa.gz'... +read 1000000 sequences, 289026795 bases, 259026795 kmers +read 1645464 sequences, 425569105 bases, 376205185 kmers +num_kmers 376205185 +cost: 2.0 + 0.262431 [bits/kmer] +max string length = 234900 +num bits per_absolute_offset = 29 +num bits per_relative_offset = 18 +num bits per_string_id = 21 +=== step 1 (encode strings): 1.17629 [sec] (3.12673 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.487854 [sec] (1.29678 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.bin' +=== step 3 (merging minimizer tuples): 2.74645 [sec] (7.3004 [ns/kmer]) +num_minimizers = 52162715 +num_minimizer_positions = 55045821 +num_super_kmers = 55045821 +building minimizers MPHF with 64 threads and 18 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 2.35264 [sec] (6.25361 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814224403175312.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 2.00997 [sec] (5.34276 [ns/kmer]) +=== step 6 (merging minimizers tuples): 3.77259 [sec] (10.028 [ns/kmer]) +num_bits_per_offset = 29 +num_buckets_larger_than_1_not_in_skew_index 2257124/52162715 (4.32708%) +num_buckets_in_skew_index 22/52162715 (4.21757e-05%) +max_bucket_size 322 +log2_max_bucket_size 9 +num_partitions in skew index 3 +num_minimizer_positions_of_buckets_larger_than_1 5137481/55045821 (9.3331%) +num_minimizer_positions_of_buckets_in_skew_index 2771/55045821 (0.00503399%) +=== step 7.1 (build sparse index): 0.64619 [sec] (1.71765 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 7461 + partition = 1: num kmers in buckets of size > 128 and <= 256: 2349 + partition = 2: num kmers in buckets of size > 256 and <= 322: 2299 +num kmers in skew index = 12109 (0.00321872%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 7461 + building MPHF with 64 threads and 1 partitions (avg. partition size = 7461)... + built mphs[0] for 7461 kmers; bits/key = 2.69991 + built positions[0] for 7461 kmers; bits/key = 7.05107 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 2349 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2349)... + built mphs[1] for 2349 kmers; bits/key = 3.16731 + built positions[1] for 2349 kmers; bits/key = 8.14645 + lower = 256; upper = 322; num_bits_per_pos = 9; num_kmers_in_partition = 2299 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2299)... + built mphs[2] for 2299 kmers; bits/key = 3.18051 + built positions[2] for 2299 kmers; bits/key = 9.15876 +=== step 7.2 (build skew index): 0.027902 [sec] (0.074167 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 95.6729% +buckets with 2 minimizer positions = 3.54418% +buckets with 3 minimizer positions = 0.560318% +buckets with 4 minimizer positions = 0.1372% +buckets with 5 minimizer positions = 0.0464911% +buckets with 6 minimizer positions = 0.0185669% +buckets with 7 minimizer positions = 0.00842556% +buckets with 8 minimizer positions = 0.00444379% +buckets with 9 minimizer positions = 0.00242894% +buckets with 10 minimizer positions = 0.00141289% +buckets with 11 minimizer positions = 0.000904861% +buckets with 12 minimizer positions = 0.000592377% +buckets with 13 minimizer positions = 0.000416006% +buckets with 14 minimizer positions = 0.000352742% +buckets with 15 minimizer positions = 0.000222381% +buckets with 16 minimizer positions = 0.000207044% +max_bucket_size = 322 +=== step 7 (build sparse and skew index): 0.806121 [sec] (2.14277 [ns/kmer]) +=== total time: 13.3519 [sec] (35.4911 [ns/kmer]) +total index size: 346391727 [B] -- 346.392 [MB] +SPACE BREAKDOWN: + mphf: 0.403884 [bits/kmer] (2.91287 [bits/key]) -- 5.48307% + strings_offsets: 0.14346 [bits/kmer] -- 1.9476% + control_codewords: 4.15965 [bits/kmer] -- 56.4708% + mid_load_buckets: 0.396027 [bits/kmer] -- 5.3764% + begin_buckets_of_size: 5.69902e-06 [bits/kmer] -- 7.73691e-05% + strings: 2.26243 [bits/kmer] -- 30.7144% + skew_index: 0.000554293 [bits/kmer] -- 0.00752501% + weights: 3.91276e-06 [bits/kmer] -- 5.31191e-05% + -------------- + total: 7.36602 [bits/kmer] +2025-11-10 23:37:17: saving data structure to disk... +2025-11-10 23:37:19: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/jgi_fungi.batch-0.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k31.sshash +2025-11-10 23:37:19: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/jgi_fungi.batch-0.k31.eulertigs.fa.gz'... +read 1000000 sequences, 686017631 bases, 656017631 kmers +read 2000000 sequences, 1516930736 bases, 1456930736 kmers +read 3000000 sequences, 1671624364 bases, 1581624364 kmers +read 4000000 sequences, 1825353707 bases, 1705353707 kmers +read 5000000 sequences, 1981022079 bases, 1831022079 kmers +read 6000000 sequences, 2135517340 bases, 1955517340 kmers +read 7000000 sequences, 2290855053 bases, 2080855053 kmers +read 8000000 sequences, 2446475973 bases, 2206475973 kmers +read 9000000 sequences, 2602386528 bases, 2332386528 kmers +read 10000000 sequences, 2759608735 bases, 2459608735 kmers +read 11000000 sequences, 2916635557 bases, 2586635557 kmers +read 12000000 sequences, 3075074548 bases, 2715074548 kmers +read 13000000 sequences, 3231174451 bases, 2841174451 kmers +read 14000000 sequences, 3387702724 bases, 2967702724 kmers +read 15000000 sequences, 3546041675 bases, 3096041675 kmers +read 16000000 sequences, 3705485347 bases, 3225485347 kmers +read 17000000 sequences, 3864338567 bases, 3354338567 kmers +read 18000000 sequences, 4024214257 bases, 3484214257 kmers +read 19000000 sequences, 4185860468 bases, 3615860468 kmers +read 20000000 sequences, 4346092632 bases, 3746092632 kmers +read 21000000 sequences, 4506820088 bases, 3876820088 kmers +read 22000000 sequences, 4669576501 bases, 4009576501 kmers +read 23000000 sequences, 4833065439 bases, 4143065439 kmers +read 24000000 sequences, 4995788985 bases, 4275788985 kmers +read 25000000 sequences, 5161157096 bases, 4411157096 kmers +read 26000000 sequences, 5326624598 bases, 4546624598 kmers +read 27000000 sequences, 5491635755 bases, 4681635755 kmers +read 28000000 sequences, 5657793355 bases, 4817793355 kmers +read 29000000 sequences, 5825695255 bases, 4955695255 kmers +read 30000000 sequences, 5993510862 bases, 5093510862 kmers +read 31000000 sequences, 6162441208 bases, 5232441208 kmers +read 32000000 sequences, 6331692362 bases, 5371692362 kmers +read 33000000 sequences, 6503271864 bases, 5513271864 kmers +read 34000000 sequences, 6675760229 bases, 5655760229 kmers +read 35000000 sequences, 6848330384 bases, 5798330384 kmers +read 36000000 sequences, 7023123965 bases, 5943123965 kmers +read 37000000 sequences, 7198253074 bases, 6088253074 kmers +read 38000000 sequences, 7375809245 bases, 6235809245 kmers +read 39000000 sequences, 7554546146 bases, 6384546146 kmers +read 40000000 sequences, 7733588270 bases, 6533588270 kmers +read 41000000 sequences, 7913812723 bases, 6683812723 kmers +read 42000000 sequences, 8096249793 bases, 6836249793 kmers +read 43000000 sequences, 8280221420 bases, 6990221420 kmers +read 44000000 sequences, 8465351199 bases, 7145351199 kmers +read 45000000 sequences, 8653130199 bases, 7303130199 kmers +read 46000000 sequences, 8842916979 bases, 7462916979 kmers +read 47000000 sequences, 9034171590 bases, 7624171590 kmers +read 48000000 sequences, 9229077420 bases, 7789077420 kmers +read 49000000 sequences, 9427173385 bases, 7957173385 kmers +read 50000000 sequences, 9626599822 bases, 8126599822 kmers +read 51000000 sequences, 9828281066 bases, 8298281066 kmers +read 52000000 sequences, 10034632099 bases, 8474632099 kmers +read 53000000 sequences, 10244441062 bases, 8654441062 kmers +read 54000000 sequences, 10461638729 bases, 8841638729 kmers +read 55000000 sequences, 10681775593 bases, 9031775593 kmers +read 56000000 sequences, 10909062511 bases, 9229062511 kmers +read 57000000 sequences, 11141765143 bases, 9431765143 kmers +read 58000000 sequences, 11382466536 bases, 9642466536 kmers +read 59000000 sequences, 11631643814 bases, 9861643814 kmers +read 60000000 sequences, 11893103257 bases, 10093103257 kmers +read 61000000 sequences, 12168237378 bases, 10338237378 kmers +read 62000000 sequences, 12459768025 bases, 10599768025 kmers +read 63000000 sequences, 12773380141 bases, 10883380141 kmers +read 64000000 sequences, 13118311195 bases, 11198311195 kmers +read 65000000 sequences, 13506575783 bases, 11556575783 kmers +read 66000000 sequences, 13957265370 bases, 11977265370 kmers +read 66677672 sequences, 14320170624 bases, 12319840464 kmers +num_kmers 12319840464 +cost: 2.0 + 0.324733 [bits/kmer] +max string length = 199388 +num bits per_absolute_offset = 34 +num bits per_relative_offset = 18 +num bits per_string_id = 26 +=== step 1 (encode strings): 40.468 [sec] (3.28478 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.64.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.65.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.66.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.67.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.68.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.69.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.70.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.71.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.72.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.73.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.74.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.75.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.76.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.77.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.78.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.79.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.80.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.81.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.82.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.83.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.84.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.85.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.86.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.87.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.88.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.89.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.90.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.91.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.92.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.93.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.94.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.95.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.96.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.97.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.98.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.99.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.100.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.101.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.102.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.103.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.104.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.105.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.106.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.107.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.108.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.109.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.110.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.111.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.112.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.113.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.114.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.115.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.116.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.117.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.118.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.119.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.120.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.121.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.122.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.123.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.124.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.125.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.126.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.127.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.128.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.129.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.130.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.131.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.132.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.133.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.134.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.135.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.136.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.137.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.138.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.139.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.140.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.141.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.142.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.143.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.144.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.145.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.146.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.147.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.148.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.149.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.150.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.151.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.152.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.153.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.154.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.155.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.156.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.157.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.158.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.159.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.160.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.161.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.162.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.163.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.164.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.165.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.166.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.167.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.168.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.169.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.170.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.171.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.172.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.173.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.174.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.175.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.176.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.177.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.178.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.179.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.180.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.181.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.182.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.183.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.184.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.185.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.186.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.187.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.188.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.189.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.190.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.191.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.192.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.193.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.194.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.195.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.196.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.197.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.198.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.199.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.200.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.201.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.202.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.203.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.204.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.205.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.206.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.207.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.208.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.209.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.210.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.211.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.212.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.213.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.214.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.215.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.216.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.217.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.218.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.219.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.220.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.221.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.222.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.223.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.224.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.225.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.226.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.227.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.228.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.229.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.230.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.231.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.232.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.233.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.234.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.235.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.236.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.237.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.238.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.239.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.240.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.241.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.242.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.243.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.244.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.245.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.246.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.247.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.248.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.249.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.250.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.251.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.252.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.253.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.254.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.255.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.256.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.257.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.258.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.259.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.260.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.261.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.262.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.263.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.264.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.265.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.266.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.267.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.268.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.269.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.270.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.271.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.272.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.273.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.274.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.275.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.276.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.277.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.278.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.279.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.280.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.281.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.282.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.283.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.284.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.285.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.286.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.287.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.288.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.289.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.290.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.291.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.292.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.293.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.294.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.295.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.296.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.297.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.298.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.299.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.300.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.301.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.302.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.303.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.304.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.305.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.306.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.307.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.308.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.309.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.310.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.311.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.312.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.313.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.314.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.315.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.316.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.317.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.318.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.319.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.320.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.321.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.322.bin'... +=== step 2 (compute minimizer tuples): 13.5477 [sec] (1.09966 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +processed 700000000 minimizer tuples +processed 800000000 minimizer tuples +processed 900000000 minimizer tuples +processed 1000000000 minimizer tuples +processed 1100000000 minimizer tuples +processed 1200000000 minimizer tuples +processed 1300000000 minimizer tuples +processed 1400000000 minimizer tuples +processed 1500000000 minimizer tuples +processed 1600000000 minimizer tuples +processed 1700000000 minimizer tuples +processed 1800000000 minimizer tuples +processed 1900000000 minimizer tuples +processed 2000000000 minimizer tuples +=== step 3 (merging minimizer tuples): 157.577 [sec] (12.7905 [ns/kmer]) +num_minimizers = 1961525096 +num_minimizer_positions = 2099034004 +num_super_kmers = 2099034004 +building minimizers MPHF with 64 threads and 654 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 172.892 [sec] (14.0336 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.4.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 150.31 [sec] (12.2007 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762814239345305644.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +processed 700000000 minimizer tuples +processed 800000000 minimizer tuples +processed 900000000 minimizer tuples +processed 1000000000 minimizer tuples +processed 1100000000 minimizer tuples +processed 1200000000 minimizer tuples +processed 1300000000 minimizer tuples +processed 1400000000 minimizer tuples +processed 1500000000 minimizer tuples +processed 1600000000 minimizer tuples +processed 1700000000 minimizer tuples +processed 1800000000 minimizer tuples +processed 1900000000 minimizer tuples +processed 2000000000 minimizer tuples +=== step 6 (merging minimizers tuples): 239.907 [sec] (19.4732 [ns/kmer]) +num_bits_per_offset = 34 +num_buckets_larger_than_1_not_in_skew_index 91088139/1961525096 (4.64374%) +num_buckets_in_skew_index 15359/1961525096 (0.000783013%) +max_bucket_size 29356 +log2_max_bucket_size 15 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 225830591/2099034004 (10.7588%) +num_minimizer_positions_of_buckets_in_skew_index 2781815/2099034004 (0.132528%) +=== step 7.1 (build sparse index): 26.8345 [sec] (2.17816 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 4802542 + partition = 1: num kmers in buckets of size > 128 and <= 256: 3332028 + partition = 2: num kmers in buckets of size > 256 and <= 512: 2066925 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 1692798 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 1087998 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 606642 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 618629 + partition = 7: num kmers in buckets of size > 8192 and <= 29356: 453529 +num kmers in skew index = 14661091 (0.119004%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 4802542 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[0] for 4802542 kmers; bits/key = 2.73723 + built positions[0] for 4802542 kmers; bits/key = 7.00008 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 3332028 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[1] for 3332028 kmers; bits/key = 3.24707 + built positions[1] for 3332028 kmers; bits/key = 8.00011 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 2066925 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2066925)... + built mphs[2] for 2066925 kmers; bits/key = 2.56012 + built positions[2] for 2066925 kmers; bits/key = 9.00018 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 1692798 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1692798)... + built mphs[3] for 1692798 kmers; bits/key = 2.56031 + built positions[3] for 1692798 kmers; bits/key = 10.0002 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 1087998 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1087998)... + built mphs[4] for 1087998 kmers; bits/key = 2.41823 + built positions[4] for 1087998 kmers; bits/key = 11.0003 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 606642 + building MPHF with 64 threads and 1 partitions (avg. partition size = 606642)... + built mphs[5] for 606642 kmers; bits/key = 2.41969 + built positions[5] for 606642 kmers; bits/key = 12.0006 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 618629 + building MPHF with 64 threads and 1 partitions (avg. partition size = 618629)... + built mphs[6] for 618629 kmers; bits/key = 2.56252 + built positions[6] for 618629 kmers; bits/key = 13.0006 + lower = 8192; upper = 29356; num_bits_per_pos = 15; num_kmers_in_partition = 453529 + building MPHF with 64 threads and 1 partitions (avg. partition size = 453529)... + built mphs[7] for 453529 kmers; bits/key = 2.42087 + built positions[7] for 453529 kmers; bits/key = 15.0007 +=== step 7.2 (build skew index): 4.79322 [sec] (0.389065 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 95.3555% +buckets with 2 minimizer positions = 3.59761% +buckets with 3 minimizer positions = 0.65191% +buckets with 4 minimizer positions = 0.191677% +buckets with 5 minimizer positions = 0.0777054% +buckets with 6 minimizer positions = 0.0393496% +buckets with 7 minimizer positions = 0.0230028% +buckets with 8 minimizer positions = 0.0148099% +buckets with 9 minimizer positions = 0.0102043% +buckets with 10 minimizer positions = 0.00731242% +buckets with 11 minimizer positions = 0.00546238% +buckets with 12 minimizer positions = 0.00416008% +buckets with 13 minimizer positions = 0.00324798% +buckets with 14 minimizer positions = 0.0025861% +buckets with 15 minimizer positions = 0.0020871% +buckets with 16 minimizer positions = 0.00169791% +max_bucket_size = 29356 +=== step 7 (build sparse and skew index): 36.1232 [sec] (2.93212 [ns/kmer]) +=== total time: 810.824 [sec] (65.8145 [ns/kmer]) +total index size: 14122834359 [B] -- 14122.8 [MB] +SPACE BREAKDOWN: + mphf: 0.449842 [bits/kmer] (2.82534 [bits/key]) -- 4.90516% + strings_offsets: 0.178888 [bits/kmer] -- 1.95063% + control_codewords: 5.57259 [bits/kmer] -- 60.7645% + mid_load_buckets: 0.623242 [bits/kmer] -- 6.79595% + begin_buckets_of_size: 1.74028e-07 [bits/kmer] -- 1.89764e-06% + strings: 2.32473 [bits/kmer] -- 25.3493% + skew_index: 0.0214982 [bits/kmer] -- 0.234421% + weights: 1.19482e-07 [bits/kmer] -- 1.30285e-06% + -------------- + total: 9.17079 [bits/kmer] +2025-11-10 23:50:50: saving data structure to disk... +2025-11-10 23:51:56: DONE diff --git a/benchmarks/results-10-11-25/k31/regular-build.time.log b/benchmarks/results-10-11-25/k31/regular-build.time.log new file mode 100644 index 0000000..c483c29 --- /dev/null +++ b/benchmarks/results-10-11-25/k31/regular-build.time.log @@ -0,0 +1,207 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k31.eulertigs.fa.gz -k 31 -m 20 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.sshash" + User time (seconds): 42.15 + System time (seconds): 10.57 + Percent of CPU this job got: 185% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:28.37 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 4078416 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 2660433 + Voluntary context switches: 29457 + Involuntary context switches: 7941 + Swaps: 0 + File system inputs: 0 + File system outputs: 11811032 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k31.eulertigs.fa.gz -k 31 -m 20 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.sshash" + User time (seconds): 111.91 + System time (seconds): 26.47 + Percent of CPU this job got: 289% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:47.78 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 9624656 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6078020 + Voluntary context switches: 33158 + Involuntary context switches: 9143 + Swaps: 0 + File system inputs: 128 + File system outputs: 26795048 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.sshash" + User time (seconds): 309.05 + System time (seconds): 56.64 + Percent of CPU this job got: 272% + Elapsed (wall clock) time (h:mm:ss or m:ss): 2:14.32 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 18086172 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 13788606 + Voluntary context switches: 69187 + Involuntary context switches: 10303 + Swaps: 0 + File system inputs: 176 + File system outputs: 63600976 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/axolotl.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k31.sshash" + User time (seconds): 2303.16 + System time (seconds): 492.15 + Percent of CPU this job got: 127% + Elapsed (wall clock) time (h:mm:ss or m:ss): 36:35.21 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 94469984 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 107232 + Minor (reclaiming a frame) page faults: 70110189 + Voluntary context switches: 1092211 + Involuntary context switches: 73349 + Swaps: 0 + File system inputs: 45823408 + File system outputs: 576910912 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.sshash" + User time (seconds): 472.79 + System time (seconds): 89.24 + Percent of CPU this job got: 206% + Elapsed (wall clock) time (h:mm:ss or m:ss): 4:32.37 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 23454808 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 1 + Minor (reclaiming a frame) page faults: 20354525 + Voluntary context switches: 138546 + Involuntary context switches: 15103 + Swaps: 0 + File system inputs: 3671248 + File system outputs: 121645608 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ec.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/ec.k31.sshash" + User time (seconds): 121.63 + System time (seconds): 28.91 + Percent of CPU this job got: 193% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:17.71 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 9668960 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6817679 + Voluntary context switches: 31022 + Involuntary context switches: 9096 + Swaps: 0 + File system inputs: 1247824 + File system outputs: 29435176 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.sshash" + User time (seconds): 92.24 + System time (seconds): 22.31 + Percent of CPU this job got: 226% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:50.55 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 7828188 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 5524160 + Voluntary context switches: 31543 + Involuntary context switches: 9210 + Swaps: 0 + File system inputs: 996952 + File system outputs: 23782336 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k31.eulertigs.fa.gz -k 31 -m 19 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.sshash" + User time (seconds): 26.33 + System time (seconds): 7.84 + Percent of CPU this job got: 228% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:14.93 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 2983092 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 1880126 + Voluntary context switches: 29352 + Involuntary context switches: 7007 + Swaps: 0 + File system inputs: 265464 + File system outputs: 8333480 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/jgi_fungi.batch-0.k31.eulertigs.fa.gz -k 31 -m 21 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k31.sshash" + User time (seconds): 1593.27 + System time (seconds): 266.26 + Percent of CPU this job got: 211% + Elapsed (wall clock) time (h:mm:ss or m:ss): 14:38.08 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 59072844 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 46119150 + Voluntary context switches: 651688 + Involuntary context switches: 45324 + Swaps: 0 + File system inputs: 9165920 + File system outputs: 392165928 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-10-11-25/k31/regular-streaming-queries-high-hit.json b/benchmarks/results-10-11-25/k31/regular-streaming-queries-high-hit.json new file mode 100644 index 0000000..d2d7949 --- /dev/null +++ b/benchmarks/results-10-11-25/k31/regular-streaming-queries-high-hit.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz", "num_kmers": "163287360", "num_positive_kmers": "132860997", "num_negative_kmers": "30426363", "num_invalid_kmers": "0", "num_searches": "6576340", "num_extensions": "126284657", "elapsed_millisec": "5660"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz", "num_kmers": "695737535", "num_positive_kmers": "525542891", "num_negative_kmers": "170183654", "num_invalid_kmers": "10990", "num_searches": "12437476", "num_extensions": "513105415", "elapsed_millisec": "44876"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz", "num_kmers": "1569974986", "num_positive_kmers": "1437949378", "num_negative_kmers": "130996597", "num_invalid_kmers": "1029011", "num_searches": "100222623", "num_extensions": "1337726755", "elapsed_millisec": "177288"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/ncbi-queries.fastq.gz", "num_kmers": "14092875", "num_positive_kmers": "13983775", "num_negative_kmers": "108161", "num_invalid_kmers": "939", "num_searches": "590894", "num_extensions": "13392881", "elapsed_millisec": "441"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR27871075_1.fastq.gz", "num_kmers": "789838196", "num_positive_kmers": "764882549", "num_negative_kmers": "24935381", "num_invalid_kmers": "20266", "num_searches": "218875709", "num_extensions": "546006840", "elapsed_millisec": "202642"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz", "num_kmers": "1569974986", "num_positive_kmers": "1485223278", "num_negative_kmers": "83722697", "num_invalid_kmers": "1029011", "num_searches": "135823240", "num_extensions": "1349400038", "elapsed_millisec": "231629"} diff --git a/benchmarks/results-10-11-25/k31/regular-streaming-queries-high-hit.log b/benchmarks/results-10-11-25/k31/regular-streaming-queries-high-hit.log new file mode 100644 index 0000000..00ceb86 --- /dev/null +++ b/benchmarks/results-10-11-25/k31/regular-streaming-queries-high-hit.log @@ -0,0 +1,66 @@ +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k31.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz +2025-11-11 20:49:42: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz'... +2025-11-11 20:49:48: DONE +==== query report: +num_kmers = 163287360 +num_positive_kmers = 132860997 (81.3664%) +num_negative_kmers = 30426363 (18.6336%) +num_invalid_kmers = 0 (0%) +num_searches = 6576340/132860997 (4.94979%) +num_extensions = 126284657/132860997 (95.0502%) +elapsed = 5.66 sec / 0.0943333 min / 34.6628 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k31.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz +2025-11-11 20:49:48: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz'... +2025-11-11 20:50:33: DONE +==== query report: +num_kmers = 695737535 +num_positive_kmers = 525542891 (75.5375%) +num_negative_kmers = 170183654 (24.4609%) +num_invalid_kmers = 10990 (0.00157962%) +num_searches = 12437476/525542891 (2.3666%) +num_extensions = 513105415/525542891 (97.6334%) +elapsed = 44.876 sec / 0.747933 min / 64.5013 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k31.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz +2025-11-11 20:50:34: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2025-11-11 20:53:32: DONE +==== query report: +num_kmers = 1569974986 +num_positive_kmers = 1437949378 (91.5906%) +num_negative_kmers = 130996597 (8.34387%) +num_invalid_kmers = 1029011 (0.0655431%) +num_searches = 100222623/1437949378 (6.96983%) +num_extensions = 1337726755/1437949378 (93.0302%) +elapsed = 177.288 sec / 2.9548 min / 112.924 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k31.sshash -q /mnt/hd2/pibiri/DNA/queries/ncbi-queries.fastq.gz +2025-11-11 20:53:32: performing queries from file '/mnt/hd2/pibiri/DNA/queries/ncbi-queries.fastq.gz'... +2025-11-11 20:53:32: DONE +==== query report: +num_kmers = 14092875 +num_positive_kmers = 13983775 (99.2258%) +num_negative_kmers = 108161 (0.767487%) +num_invalid_kmers = 939 (0.00666294%) +num_searches = 590894/13983775 (4.22557%) +num_extensions = 13392881/13983775 (95.7744%) +elapsed = 0.441 sec / 0.00735 min / 31.2924 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k31.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR27871075_1.fastq.gz +2025-11-11 20:53:33: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR27871075_1.fastq.gz'... +2025-11-11 20:56:55: DONE +==== query report: +num_kmers = 789838196 +num_positive_kmers = 764882549 (96.8404%) +num_negative_kmers = 24935381 (3.15702%) +num_invalid_kmers = 20266 (0.00256584%) +num_searches = 218875709/764882549 (28.6156%) +num_extensions = 546006840/764882549 (71.3844%) +elapsed = 202.642 sec / 3.37737 min / 256.561 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k31.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz +2025-11-11 20:56:57: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2025-11-11 21:00:49: DONE +==== query report: +num_kmers = 1569974986 +num_positive_kmers = 1485223278 (94.6017%) +num_negative_kmers = 83722697 (5.33274%) +num_invalid_kmers = 1029011 (0.0655431%) +num_searches = 135823240/1485223278 (9.14497%) +num_extensions = 1349400038/1485223278 (90.855%) +elapsed = 231.629 sec / 3.86048 min / 147.537 ns/kmer diff --git a/benchmarks/results-10-11-25/k63/canon-bench.json b/benchmarks/results-10-11-25/k63/canon-bench.json new file mode 100644 index 0000000..9250c79 --- /dev/null +++ b/benchmarks/results-10-11-25/k63/canon-bench.json @@ -0,0 +1,27 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.canon.sshash", "k": "63", "m": "24", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "557.269099", "negative lookup (avg_nanosec_per_kmer)": "442.286796", "access (avg_nanosec_per_kmer)": "291.709738", "iterator (avg_nanosec_per_kmer)": "2.920477"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.canon.sshash", "k": "63", "m": "24", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "562.071228", "negative lookup (avg_nanosec_per_kmer)": "443.072877", "access (avg_nanosec_per_kmer)": "291.651493", "iterator (avg_nanosec_per_kmer)": "2.912875"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.canon.sshash", "k": "63", "m": "24", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "569.896698", "negative lookup (avg_nanosec_per_kmer)": "450.983682", "access (avg_nanosec_per_kmer)": "291.025846", "iterator (avg_nanosec_per_kmer)": "2.913137"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.canon.sshash", "k": "63", "m": "24", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "534.079325", "negative lookup (avg_nanosec_per_kmer)": "476.376665", "access (avg_nanosec_per_kmer)": "328.843534", "iterator (avg_nanosec_per_kmer)": "2.963414"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.canon.sshash", "k": "63", "m": "24", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "536.497629", "negative lookup (avg_nanosec_per_kmer)": "482.526724", "access (avg_nanosec_per_kmer)": "325.917173", "iterator (avg_nanosec_per_kmer)": "2.902107"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.canon.sshash", "k": "63", "m": "24", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "537.505202", "negative lookup (avg_nanosec_per_kmer)": "475.474755", "access (avg_nanosec_per_kmer)": "330.187994", "iterator (avg_nanosec_per_kmer)": "2.902902"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "684.323283", "negative lookup (avg_nanosec_per_kmer)": "510.931251", "access (avg_nanosec_per_kmer)": "362.611814", "iterator (avg_nanosec_per_kmer)": "2.943467"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "681.865558", "negative lookup (avg_nanosec_per_kmer)": "514.816640", "access (avg_nanosec_per_kmer)": "358.338285", "iterator (avg_nanosec_per_kmer)": "2.913519"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "690.784094", "negative lookup (avg_nanosec_per_kmer)": "519.457408", "access (avg_nanosec_per_kmer)": "357.355389", "iterator (avg_nanosec_per_kmer)": "2.901267"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k63.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1252.976566", "negative lookup (avg_nanosec_per_kmer)": "682.253411", "access (avg_nanosec_per_kmer)": "762.472543", "iterator (avg_nanosec_per_kmer)": "2.926400"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k63.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1248.655529", "negative lookup (avg_nanosec_per_kmer)": "670.957837", "access (avg_nanosec_per_kmer)": "748.885651", "iterator (avg_nanosec_per_kmer)": "3.011971"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k63.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1253.046049", "negative lookup (avg_nanosec_per_kmer)": "671.440149", "access (avg_nanosec_per_kmer)": "768.426437", "iterator (avg_nanosec_per_kmer)": "2.947120"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1101.847356", "negative lookup (avg_nanosec_per_kmer)": "576.618390", "access (avg_nanosec_per_kmer)": "636.389182", "iterator (avg_nanosec_per_kmer)": "3.072998"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1109.024011", "negative lookup (avg_nanosec_per_kmer)": "591.608500", "access (avg_nanosec_per_kmer)": "639.914436", "iterator (avg_nanosec_per_kmer)": "3.029005"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1122.059020", "negative lookup (avg_nanosec_per_kmer)": "576.365005", "access (avg_nanosec_per_kmer)": "639.753773", "iterator (avg_nanosec_per_kmer)": "3.242957"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ec.k63.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1360.425486", "negative lookup (avg_nanosec_per_kmer)": "509.213953", "access (avg_nanosec_per_kmer)": "460.379995", "iterator (avg_nanosec_per_kmer)": "3.003176"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ec.k63.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1367.503204", "negative lookup (avg_nanosec_per_kmer)": "508.936788", "access (avg_nanosec_per_kmer)": "463.753427", "iterator (avg_nanosec_per_kmer)": "3.010433"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ec.k63.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1375.681829", "negative lookup (avg_nanosec_per_kmer)": "512.584564", "access (avg_nanosec_per_kmer)": "460.318823", "iterator (avg_nanosec_per_kmer)": "3.006127"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1286.918580", "negative lookup (avg_nanosec_per_kmer)": "505.491398", "access (avg_nanosec_per_kmer)": "411.809111", "iterator (avg_nanosec_per_kmer)": "2.947131"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1278.123041", "negative lookup (avg_nanosec_per_kmer)": "505.843036", "access (avg_nanosec_per_kmer)": "416.596759", "iterator (avg_nanosec_per_kmer)": "2.975711"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.canon.sshash", "k": "63", "m": "31", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "1272.423829", "negative lookup (avg_nanosec_per_kmer)": "503.753170", "access (avg_nanosec_per_kmer)": "408.845076", "iterator (avg_nanosec_per_kmer)": "2.959579"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.canon.sshash", "k": "63", "m": "23", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "518.747259", "negative lookup (avg_nanosec_per_kmer)": "437.585843", "access (avg_nanosec_per_kmer)": "283.488334", "iterator (avg_nanosec_per_kmer)": "2.923136"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.canon.sshash", "k": "63", "m": "23", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "530.427021", "negative lookup (avg_nanosec_per_kmer)": "441.286428", "access (avg_nanosec_per_kmer)": "282.772049", "iterator (avg_nanosec_per_kmer)": "2.913750"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.canon.sshash", "k": "63", "m": "23", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "518.760590", "negative lookup (avg_nanosec_per_kmer)": "437.309395", "access (avg_nanosec_per_kmer)": "279.721828", "iterator (avg_nanosec_per_kmer)": "2.918568"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k63.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "880.286325", "negative lookup (avg_nanosec_per_kmer)": "616.673906", "access (avg_nanosec_per_kmer)": "583.735908", "iterator (avg_nanosec_per_kmer)": "2.925736"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k63.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "879.150094", "negative lookup (avg_nanosec_per_kmer)": "612.251125", "access (avg_nanosec_per_kmer)": "586.975133", "iterator (avg_nanosec_per_kmer)": "2.953926"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k63.canon.sshash", "k": "63", "m": "25", "canonical": "true", "positive lookup (avg_nanosec_per_kmer)": "883.840406", "negative lookup (avg_nanosec_per_kmer)": "621.500259", "access (avg_nanosec_per_kmer)": "586.320948", "iterator (avg_nanosec_per_kmer)": "2.927535"} diff --git a/benchmarks/results-10-11-25/k63/canon-bench.log b/benchmarks/results-10-11-25/k63/canon-bench.log new file mode 100644 index 0000000..b11cfe4 --- /dev/null +++ b/benchmarks/results-10-11-25/k63/canon-bench.log @@ -0,0 +1,135 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 557.269 +negative lookup (avg_nanosec_per_kmer) 442.287 +access (avg_nanosec_per_kmer) = 291.71 +iterator (avg_nanosec_per_kmer) = 2.92048 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 562.071 +negative lookup (avg_nanosec_per_kmer) 443.073 +access (avg_nanosec_per_kmer) = 291.651 +iterator (avg_nanosec_per_kmer) = 2.91287 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 569.897 +negative lookup (avg_nanosec_per_kmer) 450.984 +access (avg_nanosec_per_kmer) = 291.026 +iterator (avg_nanosec_per_kmer) = 2.91314 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 534.079 +negative lookup (avg_nanosec_per_kmer) 476.377 +access (avg_nanosec_per_kmer) = 328.844 +iterator (avg_nanosec_per_kmer) = 2.96341 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 536.498 +negative lookup (avg_nanosec_per_kmer) 482.527 +access (avg_nanosec_per_kmer) = 325.917 +iterator (avg_nanosec_per_kmer) = 2.90211 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 537.505 +negative lookup (avg_nanosec_per_kmer) 475.475 +access (avg_nanosec_per_kmer) = 330.188 +iterator (avg_nanosec_per_kmer) = 2.9029 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 684.323 +negative lookup (avg_nanosec_per_kmer) 510.931 +access (avg_nanosec_per_kmer) = 362.612 +iterator (avg_nanosec_per_kmer) = 2.94347 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 681.866 +negative lookup (avg_nanosec_per_kmer) 514.817 +access (avg_nanosec_per_kmer) = 358.338 +iterator (avg_nanosec_per_kmer) = 2.91352 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 690.784 +negative lookup (avg_nanosec_per_kmer) 519.457 +access (avg_nanosec_per_kmer) = 357.355 +iterator (avg_nanosec_per_kmer) = 2.90127 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1252.98 +negative lookup (avg_nanosec_per_kmer) 682.253 +access (avg_nanosec_per_kmer) = 762.473 +iterator (avg_nanosec_per_kmer) = 2.9264 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1248.66 +negative lookup (avg_nanosec_per_kmer) 670.958 +access (avg_nanosec_per_kmer) = 748.886 +iterator (avg_nanosec_per_kmer) = 3.01197 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1253.05 +negative lookup (avg_nanosec_per_kmer) 671.44 +access (avg_nanosec_per_kmer) = 768.426 +iterator (avg_nanosec_per_kmer) = 2.94712 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1101.85 +negative lookup (avg_nanosec_per_kmer) 576.618 +access (avg_nanosec_per_kmer) = 636.389 +iterator (avg_nanosec_per_kmer) = 3.073 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1109.02 +negative lookup (avg_nanosec_per_kmer) 591.609 +access (avg_nanosec_per_kmer) = 639.914 +iterator (avg_nanosec_per_kmer) = 3.02901 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1122.06 +negative lookup (avg_nanosec_per_kmer) 576.365 +access (avg_nanosec_per_kmer) = 639.754 +iterator (avg_nanosec_per_kmer) = 3.24296 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ec.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1360.43 +negative lookup (avg_nanosec_per_kmer) 509.214 +access (avg_nanosec_per_kmer) = 460.38 +iterator (avg_nanosec_per_kmer) = 3.00318 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ec.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1367.5 +negative lookup (avg_nanosec_per_kmer) 508.937 +access (avg_nanosec_per_kmer) = 463.753 +iterator (avg_nanosec_per_kmer) = 3.01043 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ec.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1375.68 +negative lookup (avg_nanosec_per_kmer) 512.585 +access (avg_nanosec_per_kmer) = 460.319 +iterator (avg_nanosec_per_kmer) = 3.00613 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1286.92 +negative lookup (avg_nanosec_per_kmer) 505.491 +access (avg_nanosec_per_kmer) = 411.809 +iterator (avg_nanosec_per_kmer) = 2.94713 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1278.12 +negative lookup (avg_nanosec_per_kmer) 505.843 +access (avg_nanosec_per_kmer) = 416.597 +iterator (avg_nanosec_per_kmer) = 2.97571 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 1272.42 +negative lookup (avg_nanosec_per_kmer) 503.753 +access (avg_nanosec_per_kmer) = 408.845 +iterator (avg_nanosec_per_kmer) = 2.95958 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 518.747 +negative lookup (avg_nanosec_per_kmer) 437.586 +access (avg_nanosec_per_kmer) = 283.488 +iterator (avg_nanosec_per_kmer) = 2.92314 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 530.427 +negative lookup (avg_nanosec_per_kmer) 441.286 +access (avg_nanosec_per_kmer) = 282.772 +iterator (avg_nanosec_per_kmer) = 2.91375 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 518.761 +negative lookup (avg_nanosec_per_kmer) 437.309 +access (avg_nanosec_per_kmer) = 279.722 +iterator (avg_nanosec_per_kmer) = 2.91857 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 880.286 +negative lookup (avg_nanosec_per_kmer) 616.674 +access (avg_nanosec_per_kmer) = 583.736 +iterator (avg_nanosec_per_kmer) = 2.92574 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 879.15 +negative lookup (avg_nanosec_per_kmer) 612.251 +access (avg_nanosec_per_kmer) = 586.975 +iterator (avg_nanosec_per_kmer) = 2.95393 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k63.canon.sshash +positive lookup (avg_nanosec_per_kmer) = 883.84 +negative lookup (avg_nanosec_per_kmer) 621.5 +access (avg_nanosec_per_kmer) = 586.321 +iterator (avg_nanosec_per_kmer) = 2.92754 diff --git a/benchmarks/results-10-11-25/k63/canon-build.json b/benchmarks/results-10-11-25/k63/canon-build.json new file mode 100644 index 0000000..c6ec00c --- /dev/null +++ b/benchmarks/results-10-11-25/k63/canon-build.json @@ -0,0 +1,9 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/cod.k63.eulertigs.fa.gz", "k": "63", "m": "24", "canonical": "true", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "1455730", "step 2 (compute minimizer tuples)": "423547", "step 3 (merging minimizer tuples)": "1932696", "step 4 (build mphf)": "1615217", "step 5 (replacing minimizer values with MPHF hashes)": "1332988", "step 6 (merging minimizers tuples)": "2395700", "step 7.1 (build sparse index)": "500158", "step 7.2 (build skew index)": "5663735", "step 7 (build sparse and skew index)": "6253775", "total_build_time_in_microsec": "15409653", "index_size_in_bytes": "345440542", "num_kmers": "556585658"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/kestrel.k63.eulertigs.fa.gz", "k": "63", "m": "24", "canonical": "true", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "2621728", "step 2 (compute minimizer tuples)": "666365", "step 3 (merging minimizer tuples)": "3742543", "step 4 (build mphf)": "2931134", "step 5 (replacing minimizer values with MPHF hashes)": "2642471", "step 6 (merging minimizers tuples)": "5286761", "step 7.1 (build sparse index)": "611732", "step 7.2 (build skew index)": "1058241", "step 7 (build sparse and skew index)": "1821152", "total_build_time_in_microsec": "19712154", "index_size_in_bytes": "610013389", "num_kmers": "1155250667"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz", "k": "63", "m": "25", "canonical": "true", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "6797908", "step 2 (compute minimizer tuples)": "1491658", "step 3 (merging minimizer tuples)": "9284711", "step 4 (build mphf)": "5937076", "step 5 (replacing minimizer values with MPHF hashes)": "6276611", "step 6 (merging minimizers tuples)": "16108392", "step 7.1 (build sparse index)": "2174004", "step 7.2 (build skew index)": "20595029", "step 7 (build sparse and skew index)": "23168795", "total_build_time_in_microsec": "69065151", "index_size_in_bytes": "1839839296", "num_kmers": "2771316093"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/axolotl.k63.eulertigs.fa.gz", "k": "63", "m": "25", "canonical": "true", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "66380276", "step 2 (compute minimizer tuples)": "14288435", "step 3 (merging minimizer tuples)": "106961304", "step 4 (build mphf)": "83357684", "step 5 (replacing minimizer values with MPHF hashes)": "67389059", "step 6 (merging minimizers tuples)": "211314683", "step 7.1 (build sparse index)": "28935678", "step 7.2 (build skew index)": "309270405", "step 7 (build sparse and skew index)": "342019891", "total_build_time_in_microsec": "891711332", "index_size_in_bytes": "17433076635", "num_kmers": "22766770240"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/hprc.k63.eulertigs.fa.gz", "k": "63", "m": "31", "canonical": "true", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "26910723", "step 2 (compute minimizer tuples)": "10917373", "step 3 (merging minimizer tuples)": "34527726", "step 4 (build mphf)": "12784700", "step 5 (replacing minimizer values with MPHF hashes)": "16993522", "step 6 (merging minimizers tuples)": "66065635", "step 7.1 (build sparse index)": "13120534", "step 7.2 (build skew index)": "70705352", "step 7 (build sparse and skew index)": "85345523", "total_build_time_in_microsec": "253545202", "index_size_in_bytes": "6028576020", "num_kmers": "5926785469"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/ec.k63.eulertigs.fa.gz", "k": "63", "m": "31", "canonical": "true", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "9911460", "step 2 (compute minimizer tuples)": "1423819", "step 3 (merging minimizer tuples)": "9860370", "step 4 (build mphf)": "3902561", "step 5 (replacing minimizer values with MPHF hashes)": "5718533", "step 6 (merging minimizers tuples)": "13647523", "step 7.1 (build sparse index)": "4042275", "step 7.2 (build skew index)": "40097912", "step 7 (build sparse and skew index)": "44663017", "total_build_time_in_microsec": "89127283", "index_size_in_bytes": "2131440134", "num_kmers": "2027656011"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz", "k": "63", "m": "31", "canonical": "true", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "7224275", "step 2 (compute minimizer tuples)": "1070910", "step 3 (merging minimizer tuples)": "7387421", "step 4 (build mphf)": "3137436", "step 5 (replacing minimizer values with MPHF hashes)": "4263331", "step 6 (merging minimizers tuples)": "9988831", "step 7.1 (build sparse index)": "3092792", "step 7.2 (build skew index)": "21449308", "step 7 (build sparse and skew index)": "24928109", "total_build_time_in_microsec": "58000313", "index_size_in_bytes": "1481048960", "num_kmers": "1524904156"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k63.eulertigs.fa.gz", "k": "63", "m": "23", "canonical": "true", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "1117396", "step 2 (compute minimizer tuples)": "476091", "step 3 (merging minimizer tuples)": "1404115", "step 4 (build mphf)": "1250212", "step 5 (replacing minimizer values with MPHF hashes)": "1002353", "step 6 (merging minimizers tuples)": "1833045", "step 7.1 (build sparse index)": "430283", "step 7.2 (build skew index)": "52647", "step 7 (build sparse and skew index)": "554804", "total_build_time_in_microsec": "7638016", "index_size_in_bytes": "229841550", "num_kmers": "412515880"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/jgi_fungi.batch-0.k63.eulertigs.fa.gz", "k": "63", "m": "25", "canonical": "true", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "35644705", "step 2 (compute minimizer tuples)": "12432565", "step 3 (merging minimizer tuples)": "61124540", "step 4 (build mphf)": "56661382", "step 5 (replacing minimizer values with MPHF hashes)": "29935639", "step 6 (merging minimizers tuples)": "124814038", "step 7.1 (build sparse index)": "14713057", "step 7.2 (build skew index)": "12205092", "step 7 (build sparse and skew index)": "28991155", "total_build_time_in_microsec": "349604024", "index_size_in_bytes": "8386935913", "num_kmers": "13663610341"} diff --git a/benchmarks/results-10-11-25/k63/canon-build.log b/benchmarks/results-10-11-25/k63/canon-build.log new file mode 100644 index 0000000..f113ec8 --- /dev/null +++ b/benchmarks/results-10-11-25/k63/canon-build.log @@ -0,0 +1,2046 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k63.eulertigs.fa.gz -k 63 -m 24 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.canon.sshash --canonical +2025-11-11 01:17:27: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/cod.k63.eulertigs.fa.gz'... +read 954555 sequences, 615768068 bases, 556585658 kmers +num_kmers 556585658 +cost: 2.0 + 0.212662 [bits/kmer] +max string length = 46783 +num bits per_absolute_offset = 30 +num bits per_relative_offset = 16 +num bits per_string_id = 20 +=== step 1 (encode strings): 1.45573 [sec] (2.61546 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.423547 [sec] (0.760974 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.bin' +=== step 3 (merging minimizer tuples): 1.9327 [sec] (3.47241 [ns/kmer]) +num_minimizers = 29275778 +num_minimizer_positions = 34590805 +num_super_kmers = 36307176 +building minimizers MPHF with 64 threads and 10 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 1.61522 [sec] (2.90201 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820247195652893.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 1.33299 [sec] (2.39494 [ns/kmer]) +=== step 6 (merging minimizers tuples): 2.3957 [sec] (4.30428 [ns/kmer]) +num_bits_per_offset = 30 +num_buckets_larger_than_1_not_in_skew_index 1189512/29275778 (4.06313%) +num_buckets_in_skew_index 5032/29275778 (0.0171883%) +max_bucket_size 815743 +log2_max_bucket_size 20 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 4643594/34590805 (13.4244%) +num_minimizer_positions_of_buckets_in_skew_index 1865977/34590805 (5.39443%) +=== step 7.1 (build sparse index): 0.500158 [sec] (0.898618 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 4850737 + partition = 1: num kmers in buckets of size > 128 and <= 256: 3133092 + partition = 2: num kmers in buckets of size > 256 and <= 512: 1886121 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 1113398 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 945186 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 1019479 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 681501 + partition = 7: num kmers in buckets of size > 8192 and <= 815743: 5508330 +num kmers in skew index = 19137844 (3.43844%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 4850737 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[0] for 4850737 kmers; bits/key = 2.71418 + built positions[0] for 4850737 kmers; bits/key = 7.00007 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 3133092 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[1] for 3133092 kmers; bits/key = 3.15324 + built positions[1] for 3133092 kmers; bits/key = 8.00011 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 1886121 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1886121)... + built mphs[2] for 1886121 kmers; bits/key = 2.5602 + built positions[2] for 1886121 kmers; bits/key = 9.00018 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 1113398 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1113398)... + built mphs[3] for 1113398 kmers; bits/key = 2.56094 + built positions[3] for 1113398 kmers; bits/key = 10.0003 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 945186 + building MPHF with 64 threads and 1 partitions (avg. partition size = 945186)... + built mphs[4] for 945186 kmers; bits/key = 2.56129 + built positions[4] for 945186 kmers; bits/key = 11.0003 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1019479 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1019479)... + built mphs[5] for 1019479 kmers; bits/key = 2.56115 + built positions[5] for 1019479 kmers; bits/key = 12.0004 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 681501 + building MPHF with 64 threads and 1 partitions (avg. partition size = 681501)... + built mphs[6] for 681501 kmers; bits/key = 2.41941 + built positions[6] for 681501 kmers; bits/key = 13.0005 + lower = 8192; upper = 815743; num_bits_per_pos = 20; num_kmers_in_partition = 5508330 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[7] for 5508330 kmers; bits/key = 2.59546 + built positions[7] for 5508330 kmers; bits/key = 20.0001 +=== step 7.2 (build skew index): 5.66373 [sec] (10.1759 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 95.9197% +buckets with 2 minimizer positions = 2.25505% +buckets with 3 minimizer positions = 0.743143% +buckets with 4 minimizer positions = 0.33953% +buckets with 5 minimizer positions = 0.188914% +buckets with 6 minimizer positions = 0.116602% +buckets with 7 minimizer positions = 0.0799535% +buckets with 8 minimizer positions = 0.0565963% +buckets with 9 minimizer positions = 0.0427691% +buckets with 10 minimizer positions = 0.0324774% +buckets with 11 minimizer positions = 0.0259498% +buckets with 12 minimizer positions = 0.0206382% +buckets with 13 minimizer positions = 0.017038% +buckets with 14 minimizer positions = 0.0148143% +buckets with 15 minimizer positions = 0.0124232% +buckets with 16 minimizer positions = 0.0107973% +max_bucket_size = 815743 +=== step 7 (build sparse and skew index): 6.25378 [sec] (11.236 [ns/kmer]) +=== total time: 15.4097 [sec] (27.686 [ns/kmer]) +total index size: 345440542 [B] -- 345.441 [MB] +SPACE BREAKDOWN: + mphf: 0.151285 [bits/kmer] (2.87621 [bits/key]) -- 3.04695% + strings_offsets: 0.115799 [bits/kmer] -- 2.33224% + control_codewords: 1.63057 [bits/kmer] -- 32.8403% + mid_load_buckets: 0.250291 [bits/kmer] -- 5.04096% + begin_buckets_of_size: 3.85206e-06 [bits/kmer] -- 7.75821e-05% + strings: 2.21266 [bits/kmer] -- 44.564% + skew_index: 0.604527 [bits/kmer] -- 12.1754% + weights: 2.6447e-06 [bits/kmer] -- 5.32653e-05% + -------------- + total: 4.96514 [bits/kmer] +2025-11-11 01:17:42: saving data structure to disk... +2025-11-11 01:17:44: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k63.eulertigs.fa.gz -k 63 -m 24 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.canon.sshash --canonical +2025-11-11 01:17:44: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/kestrel.k63.eulertigs.fa.gz'... +read 155784 sequences, 1164909275 bases, 1155250667 kmers +num_kmers 1155250667 +cost: 2.0 + 0.0167212 [bits/kmer] +max string length = 261876 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 18 +num bits per_string_id = 18 +=== step 1 (encode strings): 2.62173 [sec] (2.2694 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.666365 [sec] (0.576814 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.bin' +=== step 3 (merging minimizer tuples): 3.74254 [sec] (3.23959 [ns/kmer]) +num_minimizers = 68497878 +num_minimizer_positions = 69282395 +num_super_kmers = 73080845 +building minimizers MPHF with 64 threads and 23 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 2.93113 [sec] (2.53723 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820264346018005.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 2.64247 [sec] (2.28736 [ns/kmer]) +=== step 6 (merging minimizers tuples): 5.28676 [sec] (4.57629 [ns/kmer]) +num_bits_per_offset = 31 +num_buckets_larger_than_1_not_in_skew_index 296941/68497878 (0.433504%) +num_buckets_in_skew_index 787/68497878 (0.00114894%) +max_bucket_size 1509 +log2_max_bucket_size 11 +num_partitions in skew index 5 +num_minimizer_positions_of_buckets_larger_than_1 973386/69282395 (1.40495%) +num_minimizer_positions_of_buckets_in_skew_index 108859/69282395 (0.157124%) +=== step 7.1 (build sparse index): 0.611732 [sec] (0.529523 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 854803 + partition = 1: num kmers in buckets of size > 128 and <= 256: 508726 + partition = 2: num kmers in buckets of size > 256 and <= 512: 353088 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 143157 + partition = 4: num kmers in buckets of size > 1024 and <= 1509: 45334 +num kmers in skew index = 1905108 (0.164909%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 854803 + building MPHF with 64 threads and 1 partitions (avg. partition size = 854803)... + built mphs[0] for 854803 kmers; bits/key = 2.5616 + built positions[0] for 854803 kmers; bits/key = 7.00044 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 508726 + building MPHF with 64 threads and 1 partitions (avg. partition size = 508726)... + built mphs[1] for 508726 kmers; bits/key = 2.42032 + built positions[1] for 508726 kmers; bits/key = 8.00066 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 353088 + building MPHF with 64 threads and 1 partitions (avg. partition size = 353088)... + built mphs[2] for 353088 kmers; bits/key = 2.56521 + built positions[2] for 353088 kmers; bits/key = 9.00091 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 143157 + building MPHF with 64 threads and 1 partitions (avg. partition size = 143157)... + built mphs[3] for 143157 kmers; bits/key = 2.4309 + built positions[3] for 143157 kmers; bits/key = 10.0026 + lower = 1024; upper = 1509; num_bits_per_pos = 11; num_kmers_in_partition = 45334 + building MPHF with 64 threads and 1 partitions (avg. partition size = 45334)... + built mphs[4] for 45334 kmers; bits/key = 2.46173 + built positions[4] for 45334 kmers; bits/key = 11.0074 +=== step 7.2 (build skew index): 1.05824 [sec] (0.916027 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 99.5653% +buckets with 2 minimizer positions = 0.296122% +buckets with 3 minimizer positions = 0.0602092% +buckets with 4 minimizer positions = 0.0251672% +buckets with 5 minimizer positions = 0.0136895% +buckets with 6 minimizer positions = 0.00842362% +buckets with 7 minimizer positions = 0.00572864% +buckets with 8 minimizer positions = 0.00416071% +buckets with 9 minimizer positions = 0.00301323% +buckets with 10 minimizer positions = 0.00248183% +buckets with 11 minimizer positions = 0.00184093% +buckets with 12 minimizer positions = 0.00158691% +buckets with 13 minimizer positions = 0.00117084% +buckets with 14 minimizer positions = 0.00105697% +buckets with 15 minimizer positions = 0.000887619% +buckets with 16 minimizer positions = 0.000756228% +max_bucket_size = 1509 +=== step 7 (build sparse and skew index): 1.82115 [sec] (1.57641 [ns/kmer]) +=== total time: 19.7122 [sec] (17.0631 [ns/kmer]) +total index size: 610013389 [B] -- 610.013 [MB] +SPACE BREAKDOWN: + mphf: 0.16806 [bits/kmer] (2.83441 [bits/key]) -- 3.97842% + strings_offsets: 0.0958242 [bits/kmer] -- 2.26841% + control_codewords: 1.89737 [bits/kmer] -- 44.9157% + mid_load_buckets: 0.0261201 [bits/kmer] -- 0.618333% + begin_buckets_of_size: 1.85587e-06 [bits/kmer] -- 4.39335e-05% + strings: 2.01672 [bits/kmer] -- 47.7411% + skew_index: 0.0201901 [bits/kmer] -- 0.477952% + weights: 1.27418e-06 [bits/kmer] -- 3.01633e-05% + -------------- + total: 4.22428 [bits/kmer] +2025-11-11 01:18:04: saving data structure to disk... +2025-11-11 01:18:06: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.canon.sshash --canonical +2025-11-11 01:18:06: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 6.79791 [sec] (2.45295 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 1.49166 [sec] (0.538249 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 9.28471 [sec] (3.35029 [ns/kmer]) +num_minimizers = 149769567 +num_minimizer_positions = 173272792 +num_super_kmers = 182565576 +building minimizers MPHF with 64 threads and 50 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 5.93708 [sec] (2.14233 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820286868261128.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 6.27661 [sec] (2.26485 [ns/kmer]) +=== step 6 (merging minimizers tuples): 16.1084 [sec] (5.81254 [ns/kmer]) +num_bits_per_offset = 32 +num_buckets_larger_than_1_not_in_skew_index 4082749/149769567 (2.72602%) +num_buckets_in_skew_index 35781/149769567 (0.0238907%) +max_bucket_size 284250 +log2_max_bucket_size 19 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 16495406/173272792 (9.51991%) +num_minimizer_positions_of_buckets_in_skew_index 11126349/173272792 (6.42129%) +=== step 7.1 (build sparse index): 2.174 [sec] (0.784466 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 26742724 + partition = 1: num kmers in buckets of size > 128 and <= 256: 24475836 + partition = 2: num kmers in buckets of size > 256 and <= 512: 21113117 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 19260150 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 17989259 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 15443443 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 12315344 + partition = 7: num kmers in buckets of size > 8192 and <= 284250: 28690575 +num kmers in skew index = 166030448 (5.99103%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 26742724 + building MPHF with 64 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[0] for 26742724 kmers; bits/key = 2.56429 + built positions[0] for 26742724 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 24475836 + building MPHF with 64 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[1] for 24475836 kmers; bits/key = 2.62316 + built positions[1] for 24475836 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 21113117 + building MPHF with 64 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[2] for 21113117 kmers; bits/key = 2.6904 + built positions[2] for 21113117 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 19260150 + building MPHF with 64 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[3] for 19260150 kmers; bits/key = 2.59757 + built positions[3] for 19260150 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 17989259 + building MPHF with 64 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[4] for 17989259 kmers; bits/key = 2.53715 + built positions[4] for 17989259 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 15443443 + building MPHF with 64 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[5] for 15443443 kmers; bits/key = 2.69254 + built positions[5] for 15443443 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 12315344 + building MPHF with 64 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[6] for 12315344 kmers; bits/key = 2.67895 + built positions[6] for 12315344 kmers; bits/key = 13 + lower = 8192; upper = 284250; num_bits_per_pos = 19; num_kmers_in_partition = 28690575 + building MPHF with 64 threads and 10 partitions (avg. partition size = 3000000)... + built mphs[7] for 28690575 kmers; bits/key = 2.55295 + built positions[7] for 28690575 kmers; bits/key = 19 +=== step 7.2 (build skew index): 20.595 [sec] (7.4315 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.2501% +buckets with 2 minimizer positions = 1.59767% +buckets with 3 minimizer positions = 0.444706% +buckets with 4 minimizer positions = 0.198987% +buckets with 5 minimizer positions = 0.113029% +buckets with 6 minimizer positions = 0.07244% +buckets with 7 minimizer positions = 0.0504562% +buckets with 8 minimizer positions = 0.0370322% +buckets with 9 minimizer positions = 0.0281472% +buckets with 10 minimizer positions = 0.0224598% +buckets with 11 minimizer positions = 0.0182414% +buckets with 12 minimizer positions = 0.0149463% +buckets with 13 minimizer positions = 0.0124745% +buckets with 14 minimizer positions = 0.0107405% +buckets with 15 minimizer positions = 0.00923686% +buckets with 16 minimizer positions = 0.00803234% +max_bucket_size = 284250 +=== step 7 (build sparse and skew index): 23.1688 [sec] (8.36021 [ns/kmer]) +=== total time: 69.0652 [sec] (24.9214 [ns/kmer]) +total index size: 1839839296 [B] -- 1839.84 [MB] +SPACE BREAKDOWN: + mphf: 0.152644 [bits/kmer] (2.82451 [bits/key]) -- 2.87406% + strings_offsets: 0.11255 [bits/kmer] -- 2.11915% + control_codewords: 1.78341 [bits/kmer] -- 33.579% + mid_load_buckets: 0.19047 [bits/kmer] -- 3.58627% + begin_buckets_of_size: 7.7364e-07 [bits/kmer] -- 1.45665e-05% + strings: 2.11826 [bits/kmer] -- 39.8836% + skew_index: 0.95376 [bits/kmer] -- 17.9579% + weights: 5.31156e-07 [bits/kmer] -- 1.00009e-05% + -------------- + total: 5.31109 [bits/kmer] +2025-11-11 01:19:15: saving data structure to disk... +2025-11-11 01:19:24: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/axolotl.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k63.canon.sshash --canonical +2025-11-11 01:19:24: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/axolotl.k63.eulertigs.fa.gz'... +read 1000000 sequences, 367776209 bases, 305776209 kmers +read 2000000 sequences, 737985819 bases, 613985819 kmers +read 3000000 sequences, 1110904173 bases, 924904173 kmers +read 4000000 sequences, 1483811415 bases, 1235811415 kmers +read 5000000 sequences, 1859655851 bases, 1549655851 kmers +read 6000000 sequences, 2238082976 bases, 1866082976 kmers +read 7000000 sequences, 2618741364 bases, 2184741364 kmers +read 8000000 sequences, 3003367751 bases, 2507367751 kmers +read 9000000 sequences, 3393382254 bases, 2835382254 kmers +read 10000000 sequences, 3787981427 bases, 3167981427 kmers +read 11000000 sequences, 4184097126 bases, 3502097126 kmers +read 12000000 sequences, 4584570246 bases, 3840570246 kmers +read 13000000 sequences, 4992446542 bases, 4186446542 kmers +read 14000000 sequences, 5404831631 bases, 4536831631 kmers +read 15000000 sequences, 5823371753 bases, 4893371753 kmers +read 16000000 sequences, 6248734465 bases, 5256734465 kmers +read 17000000 sequences, 6682521466 bases, 5628521466 kmers +read 18000000 sequences, 7124787217 bases, 6008787217 kmers +read 19000000 sequences, 7580114109 bases, 6402114109 kmers +read 20000000 sequences, 8047294111 bases, 6807294111 kmers +read 21000000 sequences, 8529676866 bases, 7227676866 kmers +read 22000000 sequences, 9033767795 bases, 7669767795 kmers +read 23000000 sequences, 9560185658 bases, 8134185658 kmers +read 24000000 sequences, 10119240080 bases, 8631240080 kmers +read 25000000 sequences, 10718751300 bases, 9168751300 kmers +read 26000000 sequences, 11378640529 bases, 9766640529 kmers +read 27000000 sequences, 12134212218 bases, 10460212218 kmers +read 28000000 sequences, 13067071790 bases, 11331071790 kmers +read 29000000 sequences, 13626049624 bases, 11828049624 kmers +read 30000000 sequences, 13960720037 bases, 12100720037 kmers +read 31000000 sequences, 14294970673 bases, 12372970673 kmers +read 32000000 sequences, 14628836645 bases, 12644836645 kmers +read 33000000 sequences, 14963175436 bases, 12917175436 kmers +read 34000000 sequences, 15298218879 bases, 13190218879 kmers +read 35000000 sequences, 15633699282 bases, 13463699282 kmers +read 36000000 sequences, 15967703945 bases, 13735703945 kmers +read 37000000 sequences, 16302024026 bases, 14008024026 kmers +read 38000000 sequences, 16635791931 bases, 14279791931 kmers +read 39000000 sequences, 16971526422 bases, 14553526422 kmers +read 40000000 sequences, 17307281294 bases, 14827281294 kmers +read 41000000 sequences, 17645308371 bases, 15103308371 kmers +read 42000000 sequences, 17983412927 bases, 15379412927 kmers +read 43000000 sequences, 18320303922 bases, 15654303922 kmers +read 44000000 sequences, 18658298765 bases, 15930298765 kmers +read 45000000 sequences, 18996960738 bases, 16206960738 kmers +read 46000000 sequences, 19337650486 bases, 16485650486 kmers +read 47000000 sequences, 19678229737 bases, 16764229737 kmers +read 48000000 sequences, 20021474847 bases, 17045474847 kmers +read 49000000 sequences, 20364403409 bases, 17326403409 kmers +read 50000000 sequences, 20708621449 bases, 17608621449 kmers +read 51000000 sequences, 21051850139 bases, 17889850139 kmers +read 52000000 sequences, 21397390032 bases, 18173390032 kmers +read 53000000 sequences, 21743414843 bases, 18457414843 kmers +read 54000000 sequences, 22090330124 bases, 18742330124 kmers +read 55000000 sequences, 22439410639 bases, 19029410639 kmers +read 56000000 sequences, 22787640700 bases, 19315640700 kmers +read 57000000 sequences, 23137101573 bases, 19603101573 kmers +read 58000000 sequences, 23487411623 bases, 19891411623 kmers +read 59000000 sequences, 23839869821 bases, 20181869821 kmers +read 60000000 sequences, 24191291613 bases, 20471291613 kmers +read 61000000 sequences, 24545366070 bases, 20763366070 kmers +read 62000000 sequences, 24900738859 bases, 21056738859 kmers +read 63000000 sequences, 25257104822 bases, 21351104822 kmers +read 64000000 sequences, 25617509594 bases, 21649509594 kmers +read 65000000 sequences, 25977036607 bases, 21947036607 kmers +read 66000000 sequences, 26338121105 bases, 22246121105 kmers +read 67000000 sequences, 26701662537 bases, 22547662537 kmers +read 67725914 sequences, 26965776908 bases, 22766770240 kmers +num_kmers 22766770240 +cost: 2.0 + 0.368872 [bits/kmer] +max string length = 77847 +num bits per_absolute_offset = 35 +num bits per_relative_offset = 17 +num bits per_string_id = 27 +=== step 1 (encode strings): 66.3803 [sec] (2.91567 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.64.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.65.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.66.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.67.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.68.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.69.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.70.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.71.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.72.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.73.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.74.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.75.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.76.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.77.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.78.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.79.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.80.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.81.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.82.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.83.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.84.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.85.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.86.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.87.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.88.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.89.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.90.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.91.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.92.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.93.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.94.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.95.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.96.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.97.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.98.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.99.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.100.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.101.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.102.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.103.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.104.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.105.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.106.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.107.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.108.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.109.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.110.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.111.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.112.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.113.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.114.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.115.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.116.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.117.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.118.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.119.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.120.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.121.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.122.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.123.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.124.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.125.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.126.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.127.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.128.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.129.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.130.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.131.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.132.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.133.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.134.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.135.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.136.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.137.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.138.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.139.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.140.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.141.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.142.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.143.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.144.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.145.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.146.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.147.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.148.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.149.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.150.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.151.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.152.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.153.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.154.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.155.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.156.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.157.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.158.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.159.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.160.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.161.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.162.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.163.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.164.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.165.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.166.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.167.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.168.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.169.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.170.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.171.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.172.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.173.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.174.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.175.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.176.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.177.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.178.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.179.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.180.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.181.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.182.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.183.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.184.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.185.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.186.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.187.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.188.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.189.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.190.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.191.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.192.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.193.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.194.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.195.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.196.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.197.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.198.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.199.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.200.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.201.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.202.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.203.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.204.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.205.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.206.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.207.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.208.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.209.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.210.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.211.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.212.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.213.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.214.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.215.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.216.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.217.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.218.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.219.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.220.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.221.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.222.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.223.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.224.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.225.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.226.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.227.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.228.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.229.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.230.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.231.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.232.bin'... +=== step 2 (compute minimizer tuples): 14.2884 [sec] (0.6276 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +processed 700000000 minimizer tuples +processed 800000000 minimizer tuples +processed 900000000 minimizer tuples +processed 1000000000 minimizer tuples +processed 1100000000 minimizer tuples +processed 1200000000 minimizer tuples +processed 1300000000 minimizer tuples +processed 1400000000 minimizer tuples +processed 1500000000 minimizer tuples +=== step 3 (merging minimizer tuples): 106.961 [sec] (4.69813 [ns/kmer]) +num_minimizers = 1050811069 +num_minimizer_positions = 1468555314 +num_super_kmers = 1544327439 +building minimizers MPHF with 64 threads and 351 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 83.3577 [sec] (3.66138 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.3.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 67.3891 [sec] (2.95997 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762820364727096833.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +processed 700000000 minimizer tuples +processed 800000000 minimizer tuples +processed 900000000 minimizer tuples +processed 1000000000 minimizer tuples +processed 1100000000 minimizer tuples +processed 1200000000 minimizer tuples +processed 1300000000 minimizer tuples +processed 1400000000 minimizer tuples +processed 1500000000 minimizer tuples +=== step 6 (merging minimizers tuples): 211.315 [sec] (9.28172 [ns/kmer]) +num_bits_per_offset = 35 +num_buckets_larger_than_1_not_in_skew_index 88763229/1050811069 (8.44712%) +num_buckets_in_skew_index 646454/1050811069 (0.0615195%) +max_bucket_size 180205 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 369499242/1468555314 (25.1607%) +num_minimizer_positions_of_buckets_in_skew_index 137654686/1468555314 (9.37348%) +=== step 7.1 (build sparse index): 28.9357 [sec] (1.27096 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 482180906 + partition = 1: num kmers in buckets of size > 128 and <= 256: 408452161 + partition = 2: num kmers in buckets of size > 256 and <= 512: 342999108 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 281650651 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 203247810 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 129300467 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 61963038 + partition = 7: num kmers in buckets of size > 8192 and <= 180205: 61253732 +num kmers in skew index = 1971047873 (8.65756%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 482180906 + building MPHF with 64 threads and 161 partitions (avg. partition size = 3000000)... + built mphs[0] for 482180906 kmers; bits/key = 2.55443 + built positions[0] for 482180906 kmers; bits/key = 7 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 408452161 + building MPHF with 64 threads and 137 partitions (avg. partition size = 3000000)... + built mphs[1] for 408452161 kmers; bits/key = 2.55416 + built positions[1] for 408452161 kmers; bits/key = 8 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 342999108 + building MPHF with 64 threads and 115 partitions (avg. partition size = 3000000)... + built mphs[2] for 342999108 kmers; bits/key = 2.55469 + built positions[2] for 342999108 kmers; bits/key = 9 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 281650651 + building MPHF with 64 threads and 94 partitions (avg. partition size = 3000000)... + built mphs[3] for 281650651 kmers; bits/key = 2.55625 + built positions[3] for 281650651 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 203247810 + building MPHF with 64 threads and 68 partitions (avg. partition size = 3000000)... + built mphs[4] for 203247810 kmers; bits/key = 2.55497 + built positions[4] for 203247810 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 129300467 + building MPHF with 64 threads and 44 partitions (avg. partition size = 3000000)... + built mphs[5] for 129300467 kmers; bits/key = 2.56136 + built positions[5] for 129300467 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 61963038 + building MPHF with 64 threads and 21 partitions (avg. partition size = 3000000)... + built mphs[6] for 61963038 kmers; bits/key = 2.56789 + built positions[6] for 61963038 kmers; bits/key = 13 + lower = 8192; upper = 180205; num_bits_per_pos = 18; num_kmers_in_partition = 61253732 + building MPHF with 64 threads and 21 partitions (avg. partition size = 3000000)... + built mphs[7] for 61253732 kmers; bits/key = 2.52984 + built positions[7] for 61253732 kmers; bits/key = 18 +=== step 7.2 (build skew index): 309.27 [sec] (13.5843 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 91.4914% +buckets with 2 minimizer positions = 4.74025% +buckets with 3 minimizer positions = 1.38774% +buckets with 4 minimizer positions = 0.653773% +buckets with 5 minimizer positions = 0.38509% +buckets with 6 minimizer positions = 0.252845% +buckets with 7 minimizer positions = 0.177906% +buckets with 8 minimizer positions = 0.131797% +buckets with 9 minimizer positions = 0.101004% +buckets with 10 minimizer positions = 0.0796625% +buckets with 11 minimizer positions = 0.0644186% +buckets with 12 minimizer positions = 0.0530856% +buckets with 13 minimizer positions = 0.0443078% +buckets with 14 minimizer positions = 0.0373937% +buckets with 15 minimizer positions = 0.0321165% +buckets with 16 minimizer positions = 0.0277512% +max_bucket_size = 180205 +=== step 7 (build sparse and skew index): 342.02 [sec] (15.0228 [ns/kmer]) +=== total time: 891.711 [sec] (39.1672 [ns/kmer]) +total index size: 17433076635 [B] -- 17433.1 [MB] +SPACE BREAKDOWN: + mphf: 0.130611 [bits/kmer] (2.8298 [bits/key]) -- 2.13215% + strings_offsets: 0.162621 [bits/kmer] -- 2.65469% + control_codewords: 1.6616 [bits/kmer] -- 27.1246% + mid_load_buckets: 0.568042 [bits/kmer] -- 9.27294% + begin_buckets_of_size: 9.41723e-08 [bits/kmer] -- 1.53731e-06% + strings: 2.36887 [bits/kmer] -- 38.6704% + skew_index: 1.23405 [bits/kmer] -- 20.1452% + weights: 6.46556e-08 [bits/kmer] -- 1.05546e-06% + -------------- + total: 6.1258 [bits/kmer] +2025-11-11 01:34:16: saving data structure to disk... +2025-11-11 01:35:40: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.canon.sshash --canonical +2025-11-11 01:35:41: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/hprc.k63.eulertigs.fa.gz'... +read 1000000 sequences, 123618042 bases, 61618042 kmers +read 2000000 sequences, 248006699 bases, 124006699 kmers +read 3000000 sequences, 373165849 bases, 187165849 kmers +read 4000000 sequences, 499216806 bases, 251216806 kmers +read 5000000 sequences, 626504126 bases, 316504126 kmers +read 6000000 sequences, 754844978 bases, 382844978 kmers +read 7000000 sequences, 884611558 bases, 450611558 kmers +read 8000000 sequences, 1015719906 bases, 519719906 kmers +read 9000000 sequences, 1148384128 bases, 590384128 kmers +read 10000000 sequences, 1283305186 bases, 663305186 kmers +read 11000000 sequences, 1420392618 bases, 738392618 kmers +read 12000000 sequences, 1559905103 bases, 815905103 kmers +read 13000000 sequences, 1702344045 bases, 896344045 kmers +read 14000000 sequences, 1848153905 bases, 980153905 kmers +read 15000000 sequences, 1998487611 bases, 1068487611 kmers +read 16000000 sequences, 2153589528 bases, 1161589528 kmers +read 17000000 sequences, 2314472162 bases, 1260472162 kmers +read 18000000 sequences, 2483331066 bases, 1367331066 kmers +read 19000000 sequences, 2661730312 bases, 1483730312 kmers +read 20000000 sequences, 2852409810 bases, 1612409810 kmers +read 21000000 sequences, 3060194564 bases, 1758194564 kmers +read 22000000 sequences, 3290140238 bases, 1926140238 kmers +read 23000000 sequences, 3552570970 bases, 2126570970 kmers +read 24000000 sequences, 3863888905 bases, 2375888905 kmers +read 25000000 sequences, 4253358029 bases, 2703358029 kmers +read 26000000 sequences, 4780487647 bases, 3168487647 kmers +read 27000000 sequences, 5604484526 bases, 3930484526 kmers +read 28000000 sequences, 5925952935 bases, 4189952935 kmers +read 29000000 sequences, 6039783917 bases, 4241783917 kmers +read 30000000 sequences, 6153634902 bases, 4293634902 kmers +read 31000000 sequences, 6267684053 bases, 4345684053 kmers +read 32000000 sequences, 6381788267 bases, 4397788267 kmers +read 33000000 sequences, 6496092541 bases, 4450092541 kmers +read 34000000 sequences, 6610456809 bases, 4502456809 kmers +read 35000000 sequences, 6725025608 bases, 4555025608 kmers +read 36000000 sequences, 6839697388 bases, 4607697388 kmers +read 37000000 sequences, 6954566139 bases, 4660566139 kmers +read 38000000 sequences, 7069620814 bases, 4713620814 kmers +read 39000000 sequences, 7184856392 bases, 4766856392 kmers +read 40000000 sequences, 7300352498 bases, 4820352498 kmers +read 41000000 sequences, 7415987203 bases, 4873987203 kmers +read 42000000 sequences, 7531875755 bases, 4927875755 kmers +read 43000000 sequences, 7647987237 bases, 4981987237 kmers +read 44000000 sequences, 7764325565 bases, 5036325565 kmers +read 45000000 sequences, 7880919196 bases, 5090919196 kmers +read 46000000 sequences, 7997748943 bases, 5145748943 kmers +read 47000000 sequences, 8114852221 bases, 5200852221 kmers +read 48000000 sequences, 8232292777 bases, 5256292777 kmers +read 49000000 sequences, 8349993383 bases, 5311993383 kmers +read 50000000 sequences, 8468086161 bases, 5368086161 kmers +read 51000000 sequences, 8586456588 bases, 5424456588 kmers +read 52000000 sequences, 8705279881 bases, 5481279881 kmers +read 53000000 sequences, 8824571697 bases, 5538571697 kmers +read 54000000 sequences, 8944259928 bases, 5596259928 kmers +read 55000000 sequences, 9064361649 bases, 5654361649 kmers +read 56000000 sequences, 9185024212 bases, 5713024212 kmers +read 57000000 sequences, 9306137968 bases, 5772137968 kmers +read 58000000 sequences, 9427875971 bases, 5831875971 kmers +read 59000000 sequences, 9550182119 bases, 5892182119 kmers +read 59568965 sequences, 9620061299 bases, 5926785469 kmers +num_kmers 5926785469 +cost: 2.0 + 1.2463 [bits/kmer] +max string length = 27681 +num bits per_absolute_offset = 34 +num bits per_relative_offset = 15 +num bits per_string_id = 26 +=== step 1 (encode strings): 26.9107 [sec] (4.54053 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.64.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.65.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.66.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.67.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.68.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.69.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.70.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.71.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.72.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.73.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.74.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.75.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.76.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.77.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.78.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.79.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.80.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.81.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.82.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.83.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.84.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.85.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.86.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.87.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.88.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.89.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.90.bin'... +=== step 2 (compute minimizer tuples): 10.9174 [sec] (1.84204 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +=== step 3 (merging minimizer tuples): 34.5277 [sec] (5.82571 [ns/kmer]) +num_minimizers = 295344565 +num_minimizer_positions = 485764487 +num_super_kmers = 507036670 +building minimizers MPHF with 64 threads and 99 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 12.7847 [sec] (2.15711 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.1.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 16.9935 [sec] (2.86724 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821341953482565.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +=== step 6 (merging minimizers tuples): 66.0656 [sec] (11.147 [ns/kmer]) +num_bits_per_offset = 34 +num_buckets_larger_than_1_not_in_skew_index 76095635/295344565 (25.765%) +num_buckets_in_skew_index 164193/295344565 (0.0555937%) +max_bucket_size 265182 +log2_max_bucket_size 19 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 218504008/485764487 (44.9815%) +num_minimizer_positions_of_buckets_in_skew_index 48175742/485764487 (9.91751%) +=== step 7.1 (build sparse index): 13.1205 [sec] (2.21377 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 91237805 + partition = 1: num kmers in buckets of size > 128 and <= 256: 81089595 + partition = 2: num kmers in buckets of size > 256 and <= 512: 71244043 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 64999827 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 55340829 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 44026748 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 33883659 + partition = 7: num kmers in buckets of size > 8192 and <= 265182: 85368523 +num kmers in skew index = 527191029 (8.89506%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 91237805 + building MPHF with 64 threads and 31 partitions (avg. partition size = 3000000)... + built mphs[0] for 91237805 kmers; bits/key = 2.54472 + built positions[0] for 91237805 kmers; bits/key = 7 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 81089595 + building MPHF with 64 threads and 28 partitions (avg. partition size = 3000000)... + built mphs[1] for 81089595 kmers; bits/key = 2.56263 + built positions[1] for 81089595 kmers; bits/key = 8 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 71244043 + building MPHF with 64 threads and 24 partitions (avg. partition size = 3000000)... + built mphs[2] for 71244043 kmers; bits/key = 2.56439 + built positions[2] for 71244043 kmers; bits/key = 9 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 64999827 + building MPHF with 64 threads and 22 partitions (avg. partition size = 3000000)... + built mphs[3] for 64999827 kmers; bits/key = 2.54652 + built positions[3] for 64999827 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 55340829 + building MPHF with 64 threads and 19 partitions (avg. partition size = 3000000)... + built mphs[4] for 55340829 kmers; bits/key = 2.53877 + built positions[4] for 55340829 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 44026748 + building MPHF with 64 threads and 15 partitions (avg. partition size = 3000000)... + built mphs[5] for 44026748 kmers; bits/key = 2.53894 + built positions[5] for 44026748 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 33883659 + building MPHF with 64 threads and 12 partitions (avg. partition size = 3000000)... + built mphs[6] for 33883659 kmers; bits/key = 2.5671 + built positions[6] for 33883659 kmers; bits/key = 13 + lower = 8192; upper = 265182; num_bits_per_pos = 19; num_kmers_in_partition = 85368523 + building MPHF with 64 threads and 29 partitions (avg. partition size = 3000000)... + built mphs[7] for 85368523 kmers; bits/key = 2.56049 + built positions[7] for 85368523 kmers; bits/key = 19 +=== step 7.2 (build skew index): 70.7054 [sec] (11.9298 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 74.1794% +buckets with 2 minimizer positions = 17.5493% +buckets with 3 minimizer positions = 4.9171% +buckets with 4 minimizer positions = 1.47166% +buckets with 5 minimizer positions = 0.573611% +buckets with 6 minimizer positions = 0.301038% +buckets with 7 minimizer positions = 0.190827% +buckets with 8 minimizer positions = 0.133018% +buckets with 9 minimizer positions = 0.0984907% +buckets with 10 minimizer positions = 0.0753777% +buckets with 11 minimizer positions = 0.0590937% +buckets with 12 minimizer positions = 0.0475793% +buckets with 13 minimizer positions = 0.0389636% +buckets with 14 minimizer positions = 0.0324661% +buckets with 15 minimizer positions = 0.0273305% +buckets with 16 minimizer positions = 0.0232809% +max_bucket_size = 265182 +=== step 7 (build sparse and skew index): 85.3455 [sec] (14.4 [ns/kmer]) +=== total time: 253.545 [sec] (42.7795 [ns/kmer]) +total index size: 6028576020 [B] -- 6028.58 [MB] +SPACE BREAKDOWN: + mphf: 0.141443 [bits/kmer] (2.83839 [bits/key]) -- 1.73819% + strings_offsets: 0.273643 [bits/kmer] -- 3.36278% + control_codewords: 1.74413 [bits/kmer] -- 21.4335% + mid_load_buckets: 1.25348 [bits/kmer] -- 15.404% + begin_buckets_of_size: 3.61748e-07 [bits/kmer] -- 4.44549e-06% + strings: 3.2463 [bits/kmer] -- 39.8936% + skew_index: 1.4784 [bits/kmer] -- 18.168% + weights: 2.48364e-07 [bits/kmer] -- 3.05213e-06% + -------------- + total: 8.1374 [bits/kmer] +2025-11-11 01:39:55: saving data structure to disk... +2025-11-11 01:40:22: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ec.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/ec.k63.canon.sshash --canonical +2025-11-11 01:40:22: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/ec.k63.eulertigs.fa.gz'... +read 1000000 sequences, 146366718 bases, 84366718 kmers +read 2000000 sequences, 296791327 bases, 172791327 kmers +read 3000000 sequences, 452465563 bases, 266465563 kmers +read 4000000 sequences, 614198433 bases, 366198433 kmers +read 5000000 sequences, 783034539 bases, 473034539 kmers +read 6000000 sequences, 966455345 bases, 594455345 kmers +read 7000000 sequences, 1177950483 bases, 743950483 kmers +read 8000000 sequences, 1408450282 bases, 912450282 kmers +read 9000000 sequences, 1536152549 bases, 978152549 kmers +read 10000000 sequences, 1664904910 bases, 1044904910 kmers +read 11000000 sequences, 1794061925 bases, 1112061925 kmers +read 12000000 sequences, 1923575230 bases, 1179575230 kmers +read 13000000 sequences, 2053070210 bases, 1247070210 kmers +read 14000000 sequences, 2183943636 bases, 1315943636 kmers +read 15000000 sequences, 2316373175 bases, 1386373175 kmers +read 16000000 sequences, 2449041041 bases, 1457041041 kmers +read 17000000 sequences, 2582562981 bases, 1528562981 kmers +read 18000000 sequences, 2717390529 bases, 1601390529 kmers +read 19000000 sequences, 2853100531 bases, 1675100531 kmers +read 20000000 sequences, 2990606476 bases, 1750606476 kmers +read 21000000 sequences, 3129300415 bases, 1827300415 kmers +read 22000000 sequences, 3269949888 bases, 1905949888 kmers +read 23000000 sequences, 3412658016 bases, 1986658016 kmers +read 23474327 sequences, 3483064285 bases, 2027656011 kmers +num_kmers 2027656011 +cost: 2.0 + 1.43556 [bits/kmer] +max string length = 643923 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 20 +num bits per_string_id = 25 +=== step 1 (encode strings): 9.91146 [sec] (4.88814 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 1.42382 [sec] (0.702199 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 9.86037 [sec] (4.86294 [ns/kmer]) +num_minimizers = 86631941 +num_minimizer_positions = 169264838 +num_super_kmers = 176725324 +building minimizers MPHF with 64 threads and 29 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.90256 [sec] (1.92467 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821622787195254.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 5.71853 [sec] (2.82027 [ns/kmer]) +=== step 6 (merging minimizers tuples): 13.6475 [sec] (6.73069 [ns/kmer]) +num_bits_per_offset = 32 +num_buckets_larger_than_1_not_in_skew_index 14311872/86631941 (16.5203%) +num_buckets_in_skew_index 167883/86631941 (0.193789%) +max_bucket_size 475227 +log2_max_bucket_size 19 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 74713236/169264838 (44.1398%) +num_minimizer_positions_of_buckets_in_skew_index 22399416/169264838 (13.2334%) +=== step 7.1 (build sparse index): 4.04228 [sec] (1.99357 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 164308993 + partition = 1: num kmers in buckets of size > 128 and <= 256: 48058096 + partition = 2: num kmers in buckets of size > 256 and <= 512: 13657562 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 6137831 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 4414714 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 4433156 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 4082117 + partition = 7: num kmers in buckets of size > 8192 and <= 475227: 35954358 +num kmers in skew index = 281046827 (13.8607%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 164308993 + building MPHF with 64 threads and 55 partitions (avg. partition size = 3000000)... + built mphs[0] for 164308993 kmers; bits/key = 2.56349 + built positions[0] for 164308993 kmers; bits/key = 7 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 48058096 + building MPHF with 64 threads and 17 partitions (avg. partition size = 3000000)... + built mphs[1] for 48058096 kmers; bits/key = 2.53932 + built positions[1] for 48058096 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 13657562 + building MPHF with 64 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[2] for 13657562 kmers; bits/key = 2.61351 + built positions[2] for 13657562 kmers; bits/key = 9.00003 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 6137831 + building MPHF with 64 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[3] for 6137831 kmers; bits/key = 2.93085 + built positions[3] for 6137831 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 4414714 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[4] for 4414714 kmers; bits/key = 2.84407 + built positions[4] for 4414714 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 4433156 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[5] for 4433156 kmers; bits/key = 2.73731 + built positions[5] for 4433156 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 4082117 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[6] for 4082117 kmers; bits/key = 2.9369 + built positions[6] for 4082117 kmers; bits/key = 13.0001 + lower = 8192; upper = 475227; num_bits_per_pos = 19; num_kmers_in_partition = 35954358 + building MPHF with 64 threads and 12 partitions (avg. partition size = 3000000)... + built mphs[7] for 35954358 kmers; bits/key = 2.55049 + built positions[7] for 35954358 kmers; bits/key = 19 +=== step 7.2 (build skew index): 40.0979 [sec] (19.7755 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 83.2859% +buckets with 2 minimizer positions = 8.38918% +buckets with 3 minimizer positions = 2.77772% +buckets with 4 minimizer positions = 1.31798% +buckets with 5 minimizer positions = 0.776307% +buckets with 6 minimizer positions = 0.518707% +buckets with 7 minimizer positions = 0.374551% +buckets with 8 minimizer positions = 0.285226% +buckets with 9 minimizer positions = 0.226981% +buckets with 10 minimizer positions = 0.183834% +buckets with 11 minimizer positions = 0.15397% +buckets with 12 minimizer positions = 0.131367% +buckets with 13 minimizer positions = 0.113066% +buckets with 14 minimizer positions = 0.0987326% +buckets with 15 minimizer positions = 0.0868144% +buckets with 16 minimizer positions = 0.0771828% +max_bucket_size = 475227 +=== step 7 (build sparse and skew index): 44.663 [sec] (22.0269 [ns/kmer]) +=== total time: 89.1273 [sec] (43.9558 [ns/kmer]) +total index size: 2131440134 [B] -- 2131.44 [MB] +SPACE BREAKDOWN: + mphf: 0.120785 [bits/kmer] (2.82702 [bits/key]) -- 1.43629% + strings_offsets: 0.291878 [bits/kmer] -- 3.47083% + control_codewords: 1.40993 [bits/kmer] -- 16.766% + mid_load_buckets: 1.17911 [bits/kmer] -- 14.0212% + begin_buckets_of_size: 1.05738e-06 [bits/kmer] -- 1.25737e-05% + strings: 3.43556 [bits/kmer] -- 40.8534% + skew_index: 1.97221 [bits/kmer] -- 23.4523% + weights: 7.25961e-07 [bits/kmer] -- 8.63266e-06% + -------------- + total: 8.40947 [bits/kmer] +2025-11-11 01:41:51: saving data structure to disk... +2025-11-11 01:42:01: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.canon.sshash --canonical +2025-11-11 01:42:01: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.22427 [sec] (4.73753 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 1.07091 [sec] (0.70228 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 7.38742 [sec] (4.84451 [ns/kmer]) +num_minimizers = 69577229 +num_minimizer_positions = 126350163 +num_super_kmers = 131966527 +building minimizers MPHF with 64 threads and 24 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.13744 [sec] (2.05746 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821721801375994.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 4.26333 [sec] (2.7958 [ns/kmer]) +=== step 6 (merging minimizers tuples): 9.98883 [sec] (6.55046 [ns/kmer]) +num_bits_per_offset = 32 +num_buckets_larger_than_1_not_in_skew_index 12190335/69577229 (17.5206%) +num_buckets_in_skew_index 86973/69577229 (0.125002%) +max_bucket_size 245177 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 57511599/126350163 (45.5176%) +num_minimizer_positions_of_buckets_in_skew_index 11538643/126350163 (9.13227%) +=== step 7.1 (build sparse index): 3.09279 [sec] (2.02819 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 77399356 + partition = 1: num kmers in buckets of size > 128 and <= 256: 26776297 + partition = 2: num kmers in buckets of size > 256 and <= 512: 13974034 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 6504754 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 3263868 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 2527517 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 2420664 + partition = 7: num kmers in buckets of size > 8192 and <= 245177: 10756294 +num kmers in skew index = 143622784 (9.41848%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 77399356 + building MPHF with 64 threads and 26 partitions (avg. partition size = 3000000)... + built mphs[0] for 77399356 kmers; bits/key = 2.54863 + built positions[0] for 77399356 kmers; bits/key = 7 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 26776297 + building MPHF with 64 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[1] for 26776297 kmers; bits/key = 2.54559 + built positions[1] for 26776297 kmers; bits/key = 8.00001 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 13974034 + building MPHF with 64 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[2] for 13974034 kmers; bits/key = 2.56375 + built positions[2] for 13974034 kmers; bits/key = 9.00003 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 6504754 + building MPHF with 64 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[3] for 6504754 kmers; bits/key = 2.78902 + built positions[3] for 6504754 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 3263868 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[4] for 3263868 kmers; bits/key = 3.30619 + built positions[4] for 3263868 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 2527517 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2527517)... + built mphs[5] for 2527517 kmers; bits/key = 2.55992 + built positions[5] for 2527517 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 2420664 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2420664)... + built mphs[6] for 2420664 kmers; bits/key = 2.55997 + built positions[6] for 2420664 kmers; bits/key = 13.0001 + lower = 8192; upper = 245177; num_bits_per_pos = 18; num_kmers_in_partition = 10756294 + building MPHF with 64 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[7] for 10756294 kmers; bits/key = 2.64819 + built positions[7] for 10756294 kmers; bits/key = 18 +=== step 7.2 (build skew index): 21.4493 [sec] (14.066 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 82.3544% +buckets with 2 minimizer positions = 9.17598% +buckets with 3 minimizer positions = 3.08546% +buckets with 4 minimizer positions = 1.44294% +buckets with 5 minimizer positions = 0.830594% +buckets with 6 minimizer positions = 0.537643% +buckets with 7 minimizer positions = 0.377841% +buckets with 8 minimizer positions = 0.278926% +buckets with 9 minimizer positions = 0.215933% +buckets with 10 minimizer positions = 0.171191% +buckets with 11 minimizer positions = 0.139152% +buckets with 12 minimizer positions = 0.116435% +buckets with 13 minimizer positions = 0.0976368% +buckets with 14 minimizer positions = 0.0839542% +buckets with 15 minimizer positions = 0.0727882% +buckets with 16 minimizer positions = 0.064557% +max_bucket_size = 245177 +=== step 7 (build sparse and skew index): 24.9281 [sec] (16.3473 [ns/kmer]) +=== total time: 58.0003 [sec] (38.0354 [ns/kmer]) +total index size: 1481048960 [B] -- 1481.05 [MB] +SPACE BREAKDOWN: + mphf: 0.132337 [bits/kmer] (2.9004 [bits/key]) -- 1.7032% + strings_offsets: 0.274587 [bits/kmer] -- 3.53397% + control_codewords: 1.5057 [bits/kmer] -- 19.3786% + mid_load_buckets: 1.20688 [bits/kmer] -- 15.5327% + begin_buckets_of_size: 1.40599e-06 [bits/kmer] -- 1.80953e-05% + strings: 3.35283 [bits/kmer] -- 43.1513% + skew_index: 1.2976 [bits/kmer] -- 16.7002% + weights: 9.65307e-07 [bits/kmer] -- 1.24236e-05% + -------------- + total: 7.76993 [bits/kmer] +2025-11-11 01:42:59: saving data structure to disk... +2025-11-11 01:43:06: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k63.eulertigs.fa.gz -k 63 -m 23 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.canon.sshash --canonical +2025-11-11 01:43:06: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k63.eulertigs.fa.gz'... +read 800231 sequences, 462130202 bases, 412515880 kmers +num_kmers 412515880 +cost: 2.0 + 0.240545 [bits/kmer] +max string length = 490374 +num bits per_absolute_offset = 29 +num bits per_relative_offset = 19 +num bits per_string_id = 20 +=== step 1 (encode strings): 1.1174 [sec] (2.70873 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.476091 [sec] (1.15412 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.bin' +=== step 3 (merging minimizer tuples): 1.40412 [sec] (3.40378 [ns/kmer]) +num_minimizers = 21966429 +num_minimizer_positions = 25033260 +num_super_kmers = 26367160 +building minimizers MPHF with 64 threads and 8 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 1.25021 [sec] (3.0307 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821786607159693.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 1.00235 [sec] (2.42985 [ns/kmer]) +=== step 6 (merging minimizers tuples): 1.83305 [sec] (4.44357 [ns/kmer]) +num_bits_per_offset = 29 +num_buckets_larger_than_1_not_in_skew_index 1786727/21966429 (8.1339%) +num_buckets_in_skew_index 26/21966429 (0.000118362%) +max_bucket_size 442 +log2_max_bucket_size 9 +num_partitions in skew index 3 +num_minimizer_positions_of_buckets_larger_than_1 4850053/25033260 (19.3744%) +num_minimizer_positions_of_buckets_in_skew_index 3531/25033260 (0.0141052%) +=== step 7.1 (build sparse index): 0.430283 [sec] (1.04307 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 26291 + partition = 1: num kmers in buckets of size > 128 and <= 256: 10791 + partition = 2: num kmers in buckets of size > 256 and <= 442: 19798 +num kmers in skew index = 56880 (0.0137886%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 26291 + building MPHF with 64 threads and 1 partitions (avg. partition size = 26291)... + built mphs[0] for 26291 kmers; bits/key = 2.35214 + built positions[0] for 26291 kmers; bits/key = 7.0132 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 10791 + building MPHF with 64 threads and 1 partitions (avg. partition size = 10791)... + built mphs[1] for 10791 kmers; bits/key = 2.61255 + built positions[1] for 10791 kmers; bits/key = 8.0304 + lower = 256; upper = 442; num_bits_per_pos = 9; num_kmers_in_partition = 19798 + building MPHF with 64 threads and 1 partitions (avg. partition size = 19798)... + built mphs[2] for 19798 kmers; bits/key = 2.5247 + built positions[2] for 19798 kmers; bits/key = 9.01909 +=== step 7.2 (build skew index): 0.052647 [sec] (0.127624 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 91.866% +buckets with 2 minimizer positions = 5.49617% +buckets with 3 minimizer positions = 1.46125% +buckets with 4 minimizer positions = 0.538881% +buckets with 5 minimizer positions = 0.252859% +buckets with 6 minimizer positions = 0.135052% +buckets with 7 minimizer positions = 0.0790661% +buckets with 8 minimizer positions = 0.0489884% +buckets with 9 minimizer positions = 0.031712% +buckets with 10 minimizer positions = 0.0217832% +buckets with 11 minimizer positions = 0.0157968% +buckets with 12 minimizer positions = 0.0113628% +buckets with 13 minimizer positions = 0.00857672% +buckets with 14 minimizer positions = 0.00628231% +buckets with 15 minimizer positions = 0.00510779% +buckets with 16 minimizer positions = 0.00396969% +max_bucket_size = 442 +=== step 7 (build sparse and skew index): 0.554804 [sec] (1.34493 [ns/kmer]) +=== total time: 7.63802 [sec] (18.5157 [ns/kmer]) +total index size: 229841550 [B] -- 229.842 [MB] +SPACE BREAKDOWN: + mphf: 0.157467 [bits/kmer] (2.95713 [bits/key]) -- 3.53273% + strings_offsets: 0.119202 [bits/kmer] -- 2.67427% + control_codewords: 1.5975 [bits/kmer] -- 35.8395% + mid_load_buckets: 0.340961 [bits/kmer] -- 7.64939% + begin_buckets_of_size: 5.19738e-06 [bits/kmer] -- 0.000116602% + strings: 2.24055 [bits/kmer] -- 50.2662% + skew_index: 0.00167864 [bits/kmer] -- 0.0376599% + weights: 3.56835e-06 [bits/kmer] -- 8.00552e-05% + -------------- + total: 4.45736 [bits/kmer] +2025-11-11 01:43:14: saving data structure to disk... +2025-11-11 01:43:15: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/jgi_fungi.batch-0.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k63.canon.sshash --canonical +2025-11-11 01:43:15: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/jgi_fungi.batch-0.k63.eulertigs.fa.gz'... +read 1000000 sequences, 508802169 bases, 446802169 kmers +read 2000000 sequences, 1018081275 bases, 894081275 kmers +read 3000000 sequences, 1530401896 bases, 1344401896 kmers +read 4000000 sequences, 2055069016 bases, 1807069016 kmers +read 5000000 sequences, 2593224248 bases, 2283224248 kmers +read 6000000 sequences, 3139549385 bases, 2767549385 kmers +read 7000000 sequences, 3701263851 bases, 3267263851 kmers +read 8000000 sequences, 4283309251 bases, 3787309251 kmers +read 9000000 sequences, 4887570899 bases, 4329570899 kmers +read 10000000 sequences, 5529157849 bases, 4909157849 kmers +read 11000000 sequences, 6209792203 bases, 5527792203 kmers +read 12000000 sequences, 6962139753 bases, 6218139753 kmers +read 13000000 sequences, 7835035276 bases, 7029035276 kmers +read 14000000 sequences, 8978632454 bases, 8110632454 kmers +read 15000000 sequences, 11212240633 bases, 10282240633 kmers +read 16000000 sequences, 11682448788 bases, 10690448788 kmers +read 17000000 sequences, 12155568466 bases, 11101568466 kmers +read 18000000 sequences, 12632855540 bases, 11516855540 kmers +read 19000000 sequences, 13118014015 bases, 11940014015 kmers +read 20000000 sequences, 13600052606 bases, 12360052606 kmers +read 21000000 sequences, 14089576602 bases, 12787576602 kmers +read 22000000 sequences, 14580292633 bases, 13216292633 kmers +read 23000000 sequences, 15076414826 bases, 13650414826 kmers +read 23030730 sequences, 15091515601 bases, 13663610341 kmers +num_kmers 13663610341 +cost: 2.0 + 0.209008 [bits/kmer] +max string length = 980757 +num bits per_absolute_offset = 34 +num bits per_relative_offset = 20 +num bits per_string_id = 25 +=== step 1 (encode strings): 35.6447 [sec] (2.60873 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.64.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.65.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.66.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.67.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.68.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.69.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.70.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.71.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.72.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.73.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.74.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.75.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.76.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.77.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.78.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.79.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.80.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.81.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.82.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.83.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.84.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.85.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.86.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.87.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.88.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.89.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.90.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.91.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.92.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.93.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.94.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.95.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.96.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.97.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.98.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.99.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.100.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.101.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.102.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.103.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.104.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.105.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.106.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.107.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.108.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.109.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.110.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.111.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.112.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.113.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.114.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.115.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.116.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.117.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.118.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.119.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.120.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.121.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.122.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.123.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.124.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.125.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.126.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.127.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.128.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.129.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.130.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.131.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.132.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.133.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.134.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.135.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.136.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.137.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.138.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.139.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.140.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.141.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.142.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.143.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.144.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.145.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.146.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.147.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.148.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.149.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.150.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.151.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.152.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.153.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.154.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.155.bin'... +=== step 2 (compute minimizer tuples): 12.4326 [sec] (0.909903 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +processed 700000000 minimizer tuples +processed 800000000 minimizer tuples +processed 900000000 minimizer tuples +=== step 3 (merging minimizer tuples): 61.1245 [sec] (4.47353 [ns/kmer]) +num_minimizers = 756254336 +num_minimizer_positions = 865984712 +num_super_kmers = 912355628 +building minimizers MPHF with 64 threads and 253 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 56.6614 [sec] (4.14688 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.1.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 29.9356 [sec] (2.1909 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762821795536487276.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +processed 700000000 minimizer tuples +processed 800000000 minimizer tuples +processed 900000000 minimizer tuples +=== step 6 (merging minimizers tuples): 124.814 [sec] (9.13478 [ns/kmer]) +num_bits_per_offset = 34 +num_buckets_larger_than_1_not_in_skew_index 57738877/756254336 (7.63485%) +num_buckets_in_skew_index 33571/756254336 (0.00443912%) +max_bucket_size 102709 +log2_max_bucket_size 17 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 162929131/865984712 (18.8143%) +num_minimizer_positions_of_buckets_in_skew_index 4573693/865984712 (0.528149%) +=== step 7.1 (build sparse index): 14.7131 [sec] (1.07681 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 31990844 + partition = 1: num kmers in buckets of size > 128 and <= 256: 17895678 + partition = 2: num kmers in buckets of size > 256 and <= 512: 8738611 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 3891800 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 1940263 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 1017382 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 1038043 + partition = 7: num kmers in buckets of size > 8192 and <= 102709: 1724288 +num kmers in skew index = 68236909 (0.499406%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 31990844 + building MPHF with 64 threads and 11 partitions (avg. partition size = 3000000)... + built mphs[0] for 31990844 kmers; bits/key = 2.53354 + built positions[0] for 31990844 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 17895678 + building MPHF with 64 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[1] for 17895678 kmers; bits/key = 2.5722 + built positions[1] for 17895678 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 8738611 + building MPHF with 64 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[2] for 8738611 kmers; bits/key = 2.62382 + built positions[2] for 8738611 kmers; bits/key = 9.00004 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 3891800 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[3] for 3891800 kmers; bits/key = 2.95004 + built positions[3] for 3891800 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 1940263 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1940263)... + built mphs[4] for 1940263 kmers; bits/key = 2.56017 + built positions[4] for 1940263 kmers; bits/key = 11.0002 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1017382 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1017382)... + built mphs[5] for 1017382 kmers; bits/key = 2.56121 + built positions[5] for 1017382 kmers; bits/key = 12.0004 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 1038043 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1038043)... + built mphs[6] for 1038043 kmers; bits/key = 2.56105 + built positions[6] for 1038043 kmers; bits/key = 13.0003 + lower = 8192; upper = 102709; num_bits_per_pos = 17; num_kmers_in_partition = 1724288 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1724288)... + built mphs[7] for 1724288 kmers; bits/key = 2.56031 + built positions[7] for 1724288 kmers; bits/key = 17.0002 +=== step 7.2 (build skew index): 12.2051 [sec] (0.893255 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 92.3607% +buckets with 2 minimizer positions = 5.15375% +buckets with 3 minimizer positions = 1.32523% +buckets with 4 minimizer positions = 0.506715% +buckets with 5 minimizer positions = 0.246927% +buckets with 6 minimizer positions = 0.133097% +buckets with 7 minimizer positions = 0.0770936% +buckets with 8 minimizer positions = 0.0472016% +buckets with 9 minimizer positions = 0.0306497% +buckets with 10 minimizer positions = 0.02095% +buckets with 11 minimizer positions = 0.0151124% +buckets with 12 minimizer positions = 0.0112313% +buckets with 13 minimizer positions = 0.00871651% +buckets with 14 minimizer positions = 0.00703467% +buckets with 15 minimizer positions = 0.00577491% +buckets with 16 minimizer positions = 0.00485895% +max_bucket_size = 102709 +=== step 7 (build sparse and skew index): 28.9912 [sec] (2.12178 [ns/kmer]) +=== total time: 349.604 [sec] (25.5865 [ns/kmer]) +total index size: 8386935913 [B] -- 8386.94 [MB] +SPACE BREAKDOWN: + mphf: 0.156651 [bits/kmer] (2.83029 [bits/key]) -- 3.19011% + strings_offsets: 0.136923 [bits/kmer] -- 2.78836% + control_codewords: 1.93718 [bits/kmer] -- 39.4496% + mid_load_buckets: 0.405427 [bits/kmer] -- 8.25628% + begin_buckets_of_size: 1.56913e-07 [bits/kmer] -- 3.19545e-06% + strings: 2.20901 [bits/kmer] -- 44.9852% + skew_index: 0.0653321 [bits/kmer] -- 1.33045% + weights: 1.07731e-07 [bits/kmer] -- 2.19389e-06% + -------------- + total: 4.91052 [bits/kmer] +2025-11-11 01:49:05: saving data structure to disk... +2025-11-11 01:49:41: DONE diff --git a/benchmarks/results-10-11-25/k63/canon-build.time.log b/benchmarks/results-10-11-25/k63/canon-build.time.log new file mode 100644 index 0000000..c29e6cb --- /dev/null +++ b/benchmarks/results-10-11-25/k63/canon-build.time.log @@ -0,0 +1,207 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k63.eulertigs.fa.gz -k 63 -m 24 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.canon.sshash --canonical" + User time (seconds): 29.79 + System time (seconds): 6.14 + Percent of CPU this job got: 209% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:17.14 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 1825648 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 1509711 + Voluntary context switches: 30933 + Involuntary context switches: 7278 + Swaps: 0 + File system inputs: 40 + File system outputs: 5542200 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k63.eulertigs.fa.gz -k 63 -m 24 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.canon.sshash --canonical" + User time (seconds): 51.21 + System time (seconds): 9.99 + Percent of CPU this job got: 271% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:22.51 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 3949252 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 2478129 + Voluntary context switches: 28610 + Involuntary context switches: 7175 + Swaps: 0 + File system inputs: 16 + File system outputs: 11324232 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.canon.sshash --canonical" + User time (seconds): 219.20 + System time (seconds): 28.68 + Percent of CPU this job got: 318% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:17.85 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 9329796 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 8203903 + Voluntary context switches: 33276 + Involuntary context switches: 9537 + Swaps: 0 + File system inputs: 96 + File system outputs: 28147704 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/axolotl.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k63.canon.sshash --canonical" + User time (seconds): 2743.71 + System time (seconds): 426.47 + Percent of CPU this job got: 324% + Elapsed (wall clock) time (h:mm:ss or m:ss): 16:17.22 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 65191296 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 59691404 + Voluntary context switches: 693079 + Involuntary context switches: 39249 + Swaps: 0 + File system inputs: 1816 + File system outputs: 288401280 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.canon.sshash --canonical" + User time (seconds): 711.43 + System time (seconds): 83.04 + Percent of CPU this job got: 282% + Elapsed (wall clock) time (h:mm:ss or m:ss): 4:40.83 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 21211816 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 21682332 + Voluntary context switches: 105268 + Involuntary context switches: 14569 + Swaps: 0 + File system inputs: 680 + File system outputs: 93530032 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ec.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/ec.k63.canon.sshash --canonical" + User time (seconds): 293.44 + System time (seconds): 42.07 + Percent of CPU this job got: 338% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:39.01 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 17326080 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 11325424 + Voluntary context switches: 31588 + Involuntary context switches: 9801 + Swaps: 0 + File system inputs: 168 + File system outputs: 25868144 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.canon.sshash --canonical" + User time (seconds): 165.11 + System time (seconds): 21.03 + Percent of CPU this job got: 287% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:04.80 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 9888600 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6330149 + Voluntary context switches: 28543 + Involuntary context switches: 8381 + Swaps: 0 + File system inputs: 120 + File system outputs: 19280272 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k63.eulertigs.fa.gz -k 63 -m 23 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.canon.sshash --canonical" + User time (seconds): 15.98 + System time (seconds): 4.27 + Percent of CPU this job got: 227% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:08.92 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 1359956 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 898110 + Voluntary context switches: 29814 + Involuntary context switches: 6043 + Swaps: 0 + File system inputs: 32 + File system outputs: 4013544 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/jgi_fungi.batch-0.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k63.canon.sshash --canonical" + User time (seconds): 820.66 + System time (seconds): 120.42 + Percent of CPU this job got: 243% + Elapsed (wall clock) time (h:mm:ss or m:ss): 6:26.56 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 30151716 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 25651988 + Voluntary context switches: 310229 + Involuntary context switches: 19290 + Swaps: 0 + File system inputs: 584 + File system outputs: 171442344 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-10-11-25/k63/canon-streaming-queries-high-hit.json b/benchmarks/results-10-11-25/k63/canon-streaming-queries-high-hit.json new file mode 100644 index 0000000..272aaaa --- /dev/null +++ b/benchmarks/results-10-11-25/k63/canon-streaming-queries-high-hit.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz", "num_kmers": "97972416", "num_positive_kmers": "67275966", "num_negative_kmers": "30696450", "num_invalid_kmers": "0", "num_searches": "34801398", "num_extensions": "32474568", "elapsed_millisec": "6369"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz", "num_kmers": "461383839", "num_positive_kmers": "293470517", "num_negative_kmers": "167902332", "num_invalid_kmers": "10990", "num_searches": "149919071", "num_extensions": "143551446", "elapsed_millisec": "31852"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz", "num_kmers": "477818474", "num_positive_kmers": "406529529", "num_negative_kmers": "70615167", "num_invalid_kmers": "673778", "num_searches": "219935013", "num_extensions": "186594516", "elapsed_millisec": "70259"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/ncbi-queries.fastq.gz", "num_kmers": "10330949", "num_positive_kmers": "10230224", "num_negative_kmers": "99451", "num_invalid_kmers": "1274", "num_searches": "5430984", "num_extensions": "4799240", "elapsed_millisec": "797"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR27871075_1.fastq.gz", "num_kmers": "541466405", "num_positive_kmers": "507202856", "num_negative_kmers": "34238416", "num_invalid_kmers": "25133", "num_searches": "319174875", "num_extensions": "188027981", "elapsed_millisec": "312834"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.canon.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz", "num_kmers": "477818474", "num_positive_kmers": "434532302", "num_negative_kmers": "42612394", "num_invalid_kmers": "673778", "num_searches": "238897243", "num_extensions": "195635059", "elapsed_millisec": "89038"} diff --git a/benchmarks/results-10-11-25/k63/canon-streaming-queries-high-hit.log b/benchmarks/results-10-11-25/k63/canon-streaming-queries-high-hit.log new file mode 100644 index 0000000..e019f29 --- /dev/null +++ b/benchmarks/results-10-11-25/k63/canon-streaming-queries-high-hit.log @@ -0,0 +1,66 @@ +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz +2025-11-11 21:19:51: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz'... +2025-11-11 21:19:57: DONE +==== query report: +num_kmers = 97972416 +num_positive_kmers = 67275966 (68.6683%) +num_negative_kmers = 30696450 (31.3317%) +num_invalid_kmers = 0 (0%) +num_searches = 34801398/67275966 (51.7293%) +num_extensions = 32474568/67275966 (48.2707%) +elapsed = 6.369 sec / 0.10615 min / 65.0081 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz +2025-11-11 21:19:58: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz'... +2025-11-11 21:20:29: DONE +==== query report: +num_kmers = 461383839 +num_positive_kmers = 293470517 (63.6066%) +num_negative_kmers = 167902332 (36.391%) +num_invalid_kmers = 10990 (0.00238196%) +num_searches = 149919071/293470517 (51.0849%) +num_extensions = 143551446/293470517 (48.9151%) +elapsed = 31.852 sec / 0.530867 min / 69.0358 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz +2025-11-11 21:20:30: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2025-11-11 21:21:40: DONE +==== query report: +num_kmers = 477818474 +num_positive_kmers = 406529529 (85.0803%) +num_negative_kmers = 70615167 (14.7787%) +num_invalid_kmers = 673778 (0.141011%) +num_searches = 219935013/406529529 (54.1006%) +num_extensions = 186594516/406529529 (45.8994%) +elapsed = 70.259 sec / 1.17098 min / 147.041 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/ncbi-queries.fastq.gz +2025-11-11 21:21:41: performing queries from file '/mnt/hd2/pibiri/DNA/queries/ncbi-queries.fastq.gz'... +2025-11-11 21:21:41: DONE +==== query report: +num_kmers = 10330949 +num_positive_kmers = 10230224 (99.025%) +num_negative_kmers = 99451 (0.962651%) +num_invalid_kmers = 1274 (0.0123319%) +num_searches = 5430984/10230224 (53.0876%) +num_extensions = 4799240/10230224 (46.9124%) +elapsed = 0.797 sec / 0.0132833 min / 77.1468 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR27871075_1.fastq.gz +2025-11-11 21:21:42: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR27871075_1.fastq.gz'... +2025-11-11 21:26:55: DONE +==== query report: +num_kmers = 541466405 +num_positive_kmers = 507202856 (93.6721%) +num_negative_kmers = 34238416 (6.32328%) +num_invalid_kmers = 25133 (0.00464165%) +num_searches = 319174875/507202856 (62.9284%) +num_extensions = 188027981/507202856 (37.0716%) +elapsed = 312.834 sec / 5.2139 min / 577.753 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.canon.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz +2025-11-11 21:26:57: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2025-11-11 21:28:26: DONE +==== query report: +num_kmers = 477818474 +num_positive_kmers = 434532302 (90.9409%) +num_negative_kmers = 42612394 (8.91811%) +num_invalid_kmers = 673778 (0.141011%) +num_searches = 238897243/434532302 (54.978%) +num_extensions = 195635059/434532302 (45.022%) +elapsed = 89.038 sec / 1.48397 min / 186.343 ns/kmer diff --git a/benchmarks/results-10-11-25/k63/regular-bench.json b/benchmarks/results-10-11-25/k63/regular-bench.json new file mode 100644 index 0000000..cf762d3 --- /dev/null +++ b/benchmarks/results-10-11-25/k63/regular-bench.json @@ -0,0 +1,27 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.sshash", "k": "63", "m": "24", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "691.263021", "negative lookup (avg_nanosec_per_kmer)": "711.377981", "access (avg_nanosec_per_kmer)": "290.806842", "iterator (avg_nanosec_per_kmer)": "2.930255"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.sshash", "k": "63", "m": "24", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "680.682504", "negative lookup (avg_nanosec_per_kmer)": "708.874007", "access (avg_nanosec_per_kmer)": "292.682575", "iterator (avg_nanosec_per_kmer)": "2.922057"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.sshash", "k": "63", "m": "24", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "695.028987", "negative lookup (avg_nanosec_per_kmer)": "709.296994", "access (avg_nanosec_per_kmer)": "292.970623", "iterator (avg_nanosec_per_kmer)": "2.933936"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.sshash", "k": "63", "m": "24", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "665.363182", "negative lookup (avg_nanosec_per_kmer)": "782.044145", "access (avg_nanosec_per_kmer)": "327.483650", "iterator (avg_nanosec_per_kmer)": "2.913678"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.sshash", "k": "63", "m": "24", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "662.942952", "negative lookup (avg_nanosec_per_kmer)": "778.503150", "access (avg_nanosec_per_kmer)": "330.263870", "iterator (avg_nanosec_per_kmer)": "2.914674"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.sshash", "k": "63", "m": "24", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "680.109998", "negative lookup (avg_nanosec_per_kmer)": "783.560782", "access (avg_nanosec_per_kmer)": "327.780588", "iterator (avg_nanosec_per_kmer)": "2.914947"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "870.332395", "negative lookup (avg_nanosec_per_kmer)": "864.239819", "access (avg_nanosec_per_kmer)": "360.617593", "iterator (avg_nanosec_per_kmer)": "2.904160"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "870.687183", "negative lookup (avg_nanosec_per_kmer)": "860.732890", "access (avg_nanosec_per_kmer)": "357.900472", "iterator (avg_nanosec_per_kmer)": "2.908669"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "880.858612", "negative lookup (avg_nanosec_per_kmer)": "855.405769", "access (avg_nanosec_per_kmer)": "360.504765", "iterator (avg_nanosec_per_kmer)": "2.948144"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k63.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1540.573845", "negative lookup (avg_nanosec_per_kmer)": "1155.274645", "access (avg_nanosec_per_kmer)": "759.041335", "iterator (avg_nanosec_per_kmer)": "2.931861"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k63.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1544.192421", "negative lookup (avg_nanosec_per_kmer)": "1149.791899", "access (avg_nanosec_per_kmer)": "751.690975", "iterator (avg_nanosec_per_kmer)": "2.937336"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k63.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1541.785496", "negative lookup (avg_nanosec_per_kmer)": "1157.108125", "access (avg_nanosec_per_kmer)": "754.569936", "iterator (avg_nanosec_per_kmer)": "3.030462"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1313.985264", "negative lookup (avg_nanosec_per_kmer)": "978.958743", "access (avg_nanosec_per_kmer)": "648.604746", "iterator (avg_nanosec_per_kmer)": "3.129373"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1322.750639", "negative lookup (avg_nanosec_per_kmer)": "974.624480", "access (avg_nanosec_per_kmer)": "647.356619", "iterator (avg_nanosec_per_kmer)": "3.042660"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1310.681843", "negative lookup (avg_nanosec_per_kmer)": "981.352905", "access (avg_nanosec_per_kmer)": "638.633135", "iterator (avg_nanosec_per_kmer)": "3.030584"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ec.k63.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1828.775354", "negative lookup (avg_nanosec_per_kmer)": "860.880335", "access (avg_nanosec_per_kmer)": "458.102701", "iterator (avg_nanosec_per_kmer)": "3.023876"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ec.k63.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1820.777033", "negative lookup (avg_nanosec_per_kmer)": "865.646887", "access (avg_nanosec_per_kmer)": "457.872156", "iterator (avg_nanosec_per_kmer)": "3.076924"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ec.k63.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1815.061634", "negative lookup (avg_nanosec_per_kmer)": "862.646866", "access (avg_nanosec_per_kmer)": "459.945051", "iterator (avg_nanosec_per_kmer)": "3.017141"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1550.438345", "negative lookup (avg_nanosec_per_kmer)": "855.393987", "access (avg_nanosec_per_kmer)": "403.945717", "iterator (avg_nanosec_per_kmer)": "2.962174"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1537.682593", "negative lookup (avg_nanosec_per_kmer)": "853.390459", "access (avg_nanosec_per_kmer)": "408.068537", "iterator (avg_nanosec_per_kmer)": "2.963759"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.sshash", "k": "63", "m": "31", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1543.086386", "negative lookup (avg_nanosec_per_kmer)": "843.749043", "access (avg_nanosec_per_kmer)": "406.675799", "iterator (avg_nanosec_per_kmer)": "2.998486"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.sshash", "k": "63", "m": "23", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "598.821670", "negative lookup (avg_nanosec_per_kmer)": "693.448774", "access (avg_nanosec_per_kmer)": "284.324825", "iterator (avg_nanosec_per_kmer)": "2.926867"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.sshash", "k": "63", "m": "23", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "618.440707", "negative lookup (avg_nanosec_per_kmer)": "687.975533", "access (avg_nanosec_per_kmer)": "281.168760", "iterator (avg_nanosec_per_kmer)": "2.930572"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.sshash", "k": "63", "m": "23", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "611.720160", "negative lookup (avg_nanosec_per_kmer)": "703.158131", "access (avg_nanosec_per_kmer)": "282.141886", "iterator (avg_nanosec_per_kmer)": "2.920330"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k63.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1078.331165", "negative lookup (avg_nanosec_per_kmer)": "1058.107574", "access (avg_nanosec_per_kmer)": "590.484979", "iterator (avg_nanosec_per_kmer)": "2.920408"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k63.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1081.961138", "negative lookup (avg_nanosec_per_kmer)": "1039.836896", "access (avg_nanosec_per_kmer)": "580.860108", "iterator (avg_nanosec_per_kmer)": "2.916847"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k63.sshash", "k": "63", "m": "25", "canonical": "false", "positive lookup (avg_nanosec_per_kmer)": "1089.778363", "negative lookup (avg_nanosec_per_kmer)": "1042.536515", "access (avg_nanosec_per_kmer)": "583.649722", "iterator (avg_nanosec_per_kmer)": "2.930816"} diff --git a/benchmarks/results-10-11-25/k63/regular-bench.log b/benchmarks/results-10-11-25/k63/regular-bench.log new file mode 100644 index 0000000..56c23f4 --- /dev/null +++ b/benchmarks/results-10-11-25/k63/regular-bench.log @@ -0,0 +1,135 @@ +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 691.263 +negative lookup (avg_nanosec_per_kmer) 711.378 +access (avg_nanosec_per_kmer) = 290.807 +iterator (avg_nanosec_per_kmer) = 2.93025 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 680.683 +negative lookup (avg_nanosec_per_kmer) 708.874 +access (avg_nanosec_per_kmer) = 292.683 +iterator (avg_nanosec_per_kmer) = 2.92206 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 695.029 +negative lookup (avg_nanosec_per_kmer) 709.297 +access (avg_nanosec_per_kmer) = 292.971 +iterator (avg_nanosec_per_kmer) = 2.93394 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 665.363 +negative lookup (avg_nanosec_per_kmer) 782.044 +access (avg_nanosec_per_kmer) = 327.484 +iterator (avg_nanosec_per_kmer) = 2.91368 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 662.943 +negative lookup (avg_nanosec_per_kmer) 778.503 +access (avg_nanosec_per_kmer) = 330.264 +iterator (avg_nanosec_per_kmer) = 2.91467 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 680.11 +negative lookup (avg_nanosec_per_kmer) 783.561 +access (avg_nanosec_per_kmer) = 327.781 +iterator (avg_nanosec_per_kmer) = 2.91495 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 870.332 +negative lookup (avg_nanosec_per_kmer) 864.24 +access (avg_nanosec_per_kmer) = 360.618 +iterator (avg_nanosec_per_kmer) = 2.90416 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 870.687 +negative lookup (avg_nanosec_per_kmer) 860.733 +access (avg_nanosec_per_kmer) = 357.9 +iterator (avg_nanosec_per_kmer) = 2.90867 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 880.859 +negative lookup (avg_nanosec_per_kmer) 855.406 +access (avg_nanosec_per_kmer) = 360.505 +iterator (avg_nanosec_per_kmer) = 2.94814 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 1540.57 +negative lookup (avg_nanosec_per_kmer) 1155.27 +access (avg_nanosec_per_kmer) = 759.041 +iterator (avg_nanosec_per_kmer) = 2.93186 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 1544.19 +negative lookup (avg_nanosec_per_kmer) 1149.79 +access (avg_nanosec_per_kmer) = 751.691 +iterator (avg_nanosec_per_kmer) = 2.93734 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 1541.79 +negative lookup (avg_nanosec_per_kmer) 1157.11 +access (avg_nanosec_per_kmer) = 754.57 +iterator (avg_nanosec_per_kmer) = 3.03046 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 1313.99 +negative lookup (avg_nanosec_per_kmer) 978.959 +access (avg_nanosec_per_kmer) = 648.605 +iterator (avg_nanosec_per_kmer) = 3.12937 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 1322.75 +negative lookup (avg_nanosec_per_kmer) 974.624 +access (avg_nanosec_per_kmer) = 647.357 +iterator (avg_nanosec_per_kmer) = 3.04266 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 1310.68 +negative lookup (avg_nanosec_per_kmer) 981.353 +access (avg_nanosec_per_kmer) = 638.633 +iterator (avg_nanosec_per_kmer) = 3.03058 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ec.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 1828.78 +negative lookup (avg_nanosec_per_kmer) 860.88 +access (avg_nanosec_per_kmer) = 458.103 +iterator (avg_nanosec_per_kmer) = 3.02388 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ec.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 1820.78 +negative lookup (avg_nanosec_per_kmer) 865.647 +access (avg_nanosec_per_kmer) = 457.872 +iterator (avg_nanosec_per_kmer) = 3.07692 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ec.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 1815.06 +negative lookup (avg_nanosec_per_kmer) 862.647 +access (avg_nanosec_per_kmer) = 459.945 +iterator (avg_nanosec_per_kmer) = 3.01714 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 1550.44 +negative lookup (avg_nanosec_per_kmer) 855.394 +access (avg_nanosec_per_kmer) = 403.946 +iterator (avg_nanosec_per_kmer) = 2.96217 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 1537.68 +negative lookup (avg_nanosec_per_kmer) 853.39 +access (avg_nanosec_per_kmer) = 408.069 +iterator (avg_nanosec_per_kmer) = 2.96376 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 1543.09 +negative lookup (avg_nanosec_per_kmer) 843.749 +access (avg_nanosec_per_kmer) = 406.676 +iterator (avg_nanosec_per_kmer) = 2.99849 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 598.822 +negative lookup (avg_nanosec_per_kmer) 693.449 +access (avg_nanosec_per_kmer) = 284.325 +iterator (avg_nanosec_per_kmer) = 2.92687 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 618.441 +negative lookup (avg_nanosec_per_kmer) 687.976 +access (avg_nanosec_per_kmer) = 281.169 +iterator (avg_nanosec_per_kmer) = 2.93057 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 611.72 +negative lookup (avg_nanosec_per_kmer) 703.158 +access (avg_nanosec_per_kmer) = 282.142 +iterator (avg_nanosec_per_kmer) = 2.92033 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 1078.33 +negative lookup (avg_nanosec_per_kmer) 1058.11 +access (avg_nanosec_per_kmer) = 590.485 +iterator (avg_nanosec_per_kmer) = 2.92041 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 1081.96 +negative lookup (avg_nanosec_per_kmer) 1039.84 +access (avg_nanosec_per_kmer) = 580.86 +iterator (avg_nanosec_per_kmer) = 2.91685 +./sshash bench -i /mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k63.sshash +positive lookup (avg_nanosec_per_kmer) = 1089.78 +negative lookup (avg_nanosec_per_kmer) 1042.54 +access (avg_nanosec_per_kmer) = 583.65 +iterator (avg_nanosec_per_kmer) = 2.93082 diff --git a/benchmarks/results-10-11-25/k63/regular-build.json b/benchmarks/results-10-11-25/k63/regular-build.json new file mode 100644 index 0000000..53b0261 --- /dev/null +++ b/benchmarks/results-10-11-25/k63/regular-build.json @@ -0,0 +1,9 @@ +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/cod.k63.eulertigs.fa.gz", "k": "63", "m": "24", "canonical": "false", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "1550501", "step 2 (compute minimizer tuples)": "341181", "step 3 (merging minimizer tuples)": "1481165", "step 4 (build mphf)": "1324126", "step 5 (replacing minimizer values with MPHF hashes)": "1072533", "step 6 (merging minimizers tuples)": "1907390", "step 7.1 (build sparse index)": "374789", "step 7.2 (build skew index)": "4746187", "step 7 (build sparse and skew index)": "5188720", "total_build_time_in_microsec": "12865616", "index_size_in_bytes": "308878216", "num_kmers": "556585658"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/kestrel.k63.eulertigs.fa.gz", "k": "63", "m": "24", "canonical": "false", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "2834930", "step 2 (compute minimizer tuples)": "499707", "step 3 (merging minimizer tuples)": "2823341", "step 4 (build mphf)": "2465617", "step 5 (replacing minimizer values with MPHF hashes)": "2044225", "step 6 (merging minimizers tuples)": "4062124", "step 7.1 (build sparse index)": "453285", "step 7.2 (build skew index)": "710322", "step 7 (build sparse and skew index)": "1277284", "total_build_time_in_microsec": "16007228", "index_size_in_bytes": "551471063", "num_kmers": "1155250667"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz", "k": "63", "m": "25", "canonical": "false", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "7287587", "step 2 (compute minimizer tuples)": "1032319", "step 3 (merging minimizer tuples)": "7045959", "step 4 (build mphf)": "4760749", "step 5 (replacing minimizer values with MPHF hashes)": "4838910", "step 6 (merging minimizers tuples)": "10719832", "step 7.1 (build sparse index)": "1615531", "step 7.2 (build skew index)": "17332459", "step 7 (build sparse and skew index)": "19253047", "total_build_time_in_microsec": "54938403", "index_size_in_bytes": "1647878160", "num_kmers": "2771316093"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/axolotl.k63.eulertigs.fa.gz", "k": "63", "m": "25", "canonical": "false", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "70905506", "step 2 (compute minimizer tuples)": "8815426", "step 3 (merging minimizer tuples)": "82707396", "step 4 (build mphf)": "32525569", "step 5 (replacing minimizer values with MPHF hashes)": "37310602", "step 6 (merging minimizers tuples)": "169296218", "step 7.1 (build sparse index)": "21324927", "step 7.2 (build skew index)": "242979594", "step 7 (build sparse and skew index)": "267119303", "total_build_time_in_microsec": "668680020", "index_size_in_bytes": "15531396563", "num_kmers": "22766770240"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/hprc.k63.eulertigs.fa.gz", "k": "63", "m": "31", "canonical": "false", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "27817896", "step 2 (compute minimizer tuples)": "6626512", "step 3 (merging minimizer tuples)": "22501102", "step 4 (build mphf)": "11049447", "step 5 (replacing minimizer values with MPHF hashes)": "13554157", "step 6 (merging minimizers tuples)": "49497804", "step 7.1 (build sparse index)": "9032404", "step 7.2 (build skew index)": "59270230", "step 7 (build sparse and skew index)": "69359825", "total_build_time_in_microsec": "200406743", "index_size_in_bytes": "5431161974", "num_kmers": "5926785469"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/ec.k63.eulertigs.fa.gz", "k": "63", "m": "31", "canonical": "false", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "10327733", "step 2 (compute minimizer tuples)": "979513", "step 3 (merging minimizer tuples)": "7740073", "step 4 (build mphf)": "3334139", "step 5 (replacing minimizer values with MPHF hashes)": "4471903", "step 6 (merging minimizers tuples)": "10551280", "step 7.1 (build sparse index)": "3218494", "step 7.2 (build skew index)": "19651337", "step 7 (build sparse and skew index)": "23244662", "total_build_time_in_microsec": "60649303", "index_size_in_bytes": "1824281560", "num_kmers": "2027656011"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz", "k": "63", "m": "31", "canonical": "false", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "7858559", "step 2 (compute minimizer tuples)": "764585", "step 3 (merging minimizer tuples)": "5748183", "step 4 (build mphf)": "2753717", "step 5 (replacing minimizer values with MPHF hashes)": "3392272", "step 6 (merging minimizers tuples)": "7612061", "step 7.1 (build sparse index)": "2395701", "step 7.2 (build skew index)": "13405634", "step 7 (build sparse and skew index)": "16076369", "total_build_time_in_microsec": "44205746", "index_size_in_bytes": "1294767218", "num_kmers": "1524904156"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k63.eulertigs.fa.gz", "k": "63", "m": "23", "canonical": "false", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "1176209", "step 2 (compute minimizer tuples)": "321286", "step 3 (merging minimizer tuples)": "1068420", "step 4 (build mphf)": "1067419", "step 5 (replacing minimizer values with MPHF hashes)": "755179", "step 6 (merging minimizers tuples)": "1127851", "step 7.1 (build sparse index)": "273848", "step 7.2 (build skew index)": "39049", "step 7 (build sparse and skew index)": "363897", "total_build_time_in_microsec": "5880261", "index_size_in_bytes": "209086404", "num_kmers": "412515880"} +{"input_filename": "/mnt/hd2/pibiri/DNA/eulertigs/jgi_fungi.batch-0.k63.eulertigs.fa.gz", "k": "63", "m": "25", "canonical": "false", "seed": "1", "num_threads": "64", "step 1 (encode strings)": "38001608", "step 2 (compute minimizer tuples)": "7701882", "step 3 (merging minimizer tuples)": "77158661", "step 4 (build mphf)": "23107746", "step 5 (replacing minimizer values with MPHF hashes)": "23805657", "step 6 (merging minimizers tuples)": "73107007", "step 7.1 (build sparse index)": "9974866", "step 7.2 (build skew index)": "8571477", "step 7 (build sparse and skew index)": "20088686", "total_build_time_in_microsec": "262971247", "index_size_in_bytes": "7528610111", "num_kmers": "13663610341"} diff --git a/benchmarks/results-10-11-25/k63/regular-build.log b/benchmarks/results-10-11-25/k63/regular-build.log new file mode 100644 index 0000000..bb53aa5 --- /dev/null +++ b/benchmarks/results-10-11-25/k63/regular-build.log @@ -0,0 +1,1975 @@ +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k63.eulertigs.fa.gz -k 63 -m 24 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.sshash +2025-11-11 00:52:41: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/cod.k63.eulertigs.fa.gz'... +read 954555 sequences, 615768068 bases, 556585658 kmers +num_kmers 556585658 +cost: 2.0 + 0.212662 [bits/kmer] +max string length = 46783 +num bits per_absolute_offset = 30 +num bits per_relative_offset = 16 +num bits per_string_id = 20 +=== step 1 (encode strings): 1.5505 [sec] (2.78574 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.341181 [sec] (0.612989 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.bin' +=== step 3 (merging minimizer tuples): 1.48117 [sec] (2.66116 [ns/kmer]) +num_minimizers = 24306363 +num_minimizer_positions = 28148872 +num_super_kmers = 28148872 +building minimizers MPHF with 64 threads and 9 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 1.32413 [sec] (2.37902 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818761289542632.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 1.07253 [sec] (1.92699 [ns/kmer]) +=== step 6 (merging minimizers tuples): 1.90739 [sec] (3.42695 [ns/kmer]) +num_bits_per_offset = 30 +num_buckets_larger_than_1_not_in_skew_index 957832/24306363 (3.94066%) +num_buckets_in_skew_index 3271/24306363 (0.0134574%) +max_bucket_size 414759 +log2_max_bucket_size 19 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 3653173/28148872 (12.978%) +num_minimizer_positions_of_buckets_in_skew_index 1150439/28148872 (4.08698%) +=== step 7.1 (build sparse index): 0.374789 [sec] (0.673372 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 3558021 + partition = 1: num kmers in buckets of size > 128 and <= 256: 2129998 + partition = 2: num kmers in buckets of size > 256 and <= 512: 1247229 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 1130328 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 1022110 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 790746 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 666283 + partition = 7: num kmers in buckets of size > 8192 and <= 414759: 3762004 +num kmers in skew index = 14306719 (2.57044%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 3558021 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[0] for 3558021 kmers; bits/key = 3.06724 + built positions[0] for 3558021 kmers; bits/key = 7.0001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 2129998 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2129998)... + built mphs[1] for 2129998 kmers; bits/key = 2.56007 + built positions[1] for 2129998 kmers; bits/key = 8.00016 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 1247229 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1247229)... + built mphs[2] for 1247229 kmers; bits/key = 2.56071 + built positions[2] for 1247229 kmers; bits/key = 9.00028 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 1130328 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1130328)... + built mphs[3] for 1130328 kmers; bits/key = 2.56088 + built positions[3] for 1130328 kmers; bits/key = 10.0003 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 1022110 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1022110)... + built mphs[4] for 1022110 kmers; bits/key = 2.41826 + built positions[4] for 1022110 kmers; bits/key = 11.0004 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 790746 + building MPHF with 64 threads and 1 partitions (avg. partition size = 790746)... + built mphs[5] for 790746 kmers; bits/key = 2.4189 + built positions[5] for 790746 kmers; bits/key = 12.0004 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 666283 + building MPHF with 64 threads and 1 partitions (avg. partition size = 666283)... + built mphs[6] for 666283 kmers; bits/key = 2.5622 + built positions[6] for 666283 kmers; bits/key = 13.0005 + lower = 8192; upper = 414759; num_bits_per_pos = 19; num_kmers_in_partition = 3762004 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[7] for 3762004 kmers; bits/key = 2.9235 + built positions[7] for 3762004 kmers; bits/key = 19.0001 +=== step 7.2 (build skew index): 4.74619 [sec] (8.52733 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 96.0459% +buckets with 2 minimizer positions = 2.25565% +buckets with 3 minimizer positions = 0.699874% +buckets with 4 minimizer positions = 0.315284% +buckets with 5 minimizer positions = 0.173597% +buckets with 6 minimizer positions = 0.108305% +buckets with 7 minimizer positions = 0.0716932% +buckets with 8 minimizer positions = 0.0520522% +buckets with 9 minimizer positions = 0.0388458% +buckets with 10 minimizer positions = 0.0301485% +buckets with 11 minimizer positions = 0.0240019% +buckets with 12 minimizer positions = 0.0204144% +buckets with 13 minimizer positions = 0.0165265% +buckets with 14 minimizer positions = 0.013984% +buckets with 15 minimizer positions = 0.0120051% +buckets with 16 minimizer positions = 0.00972996% +max_bucket_size = 414759 +=== step 7 (build sparse and skew index): 5.18872 [sec] (9.32241 [ns/kmer]) +=== total time: 12.8656 [sec] (23.1152 [ns/kmer]) +total index size: 308878216 [B] -- 308.878 [MB] +SPACE BREAKDOWN: + mphf: 0.130318 [bits/kmer] (2.98412 [bits/key]) -- 2.93534% + strings_offsets: 0.115799 [bits/kmer] -- 2.60831% + control_codewords: 1.35379 [bits/kmer] -- 30.4933% + mid_load_buckets: 0.196907 [bits/kmer] -- 4.43522% + begin_buckets_of_size: 3.85206e-06 [bits/kmer] -- 8.67656e-05% + strings: 2.21266 [bits/kmer] -- 49.8391% + skew_index: 0.430136 [bits/kmer] -- 9.68858% + weights: 2.6447e-06 [bits/kmer] -- 5.95704e-05% + -------------- + total: 4.43961 [bits/kmer] +2025-11-11 00:52:54: saving data structure to disk... +2025-11-11 00:52:55: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k63.eulertigs.fa.gz -k 63 -m 24 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.sshash +2025-11-11 00:52:55: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/kestrel.k63.eulertigs.fa.gz'... +read 155784 sequences, 1164909275 bases, 1155250667 kmers +num_kmers 1155250667 +cost: 2.0 + 0.0167212 [bits/kmer] +max string length = 261876 +num bits per_absolute_offset = 31 +num bits per_relative_offset = 18 +num bits per_string_id = 18 +=== step 1 (encode strings): 2.83493 [sec] (2.45395 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.499707 [sec] (0.432553 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.bin' +=== step 3 (merging minimizer tuples): 2.82334 [sec] (2.44392 [ns/kmer]) +num_minimizers = 55464592 +num_minimizer_positions = 56006004 +num_super_kmers = 56006004 +building minimizers MPHF with 64 threads and 19 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 2.46562 [sec] (2.13427 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818775616846859.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 2.04422 [sec] (1.76951 [ns/kmer]) +=== step 6 (merging minimizers tuples): 4.06212 [sec] (3.51623 [ns/kmer]) +num_bits_per_offset = 31 +num_buckets_larger_than_1_not_in_skew_index 208120/55464592 (0.37523%) +num_buckets_in_skew_index 490/55464592 (0.000883447%) +max_bucket_size 794 +log2_max_bucket_size 10 +num_partitions in skew index 4 +num_minimizer_positions_of_buckets_larger_than_1 687113/56006004 (1.22686%) +num_minimizer_positions_of_buckets_in_skew_index 62909/56006004 (0.112325%) +=== step 7.1 (build sparse index): 0.453285 [sec] (0.392369 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 604884 + partition = 1: num kmers in buckets of size > 128 and <= 256: 417805 + partition = 2: num kmers in buckets of size > 256 and <= 512: 221219 + partition = 3: num kmers in buckets of size > 512 and <= 794: 50229 +num kmers in skew index = 1294137 (0.112022%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 604884 + building MPHF with 64 threads and 1 partitions (avg. partition size = 604884)... + built mphs[0] for 604884 kmers; bits/key = 2.41964 + built positions[0] for 604884 kmers; bits/key = 7.00061 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 417805 + building MPHF with 64 threads and 1 partitions (avg. partition size = 417805)... + built mphs[1] for 417805 kmers; bits/key = 2.42142 + built positions[1] for 417805 kmers; bits/key = 8.00082 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 221219 + building MPHF with 64 threads and 1 partitions (avg. partition size = 221219)... + built mphs[2] for 221219 kmers; bits/key = 2.42569 + built positions[2] for 221219 kmers; bits/key = 9.00147 + lower = 512; upper = 794; num_bits_per_pos = 10; num_kmers_in_partition = 50229 + building MPHF with 64 threads and 1 partitions (avg. partition size = 50229)... + built mphs[3] for 50229 kmers; bits/key = 2.45946 + built positions[3] for 50229 kmers; bits/key = 10.0073 +=== step 7.2 (build skew index): 0.710322 [sec] (0.614864 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 99.6239% +buckets with 2 minimizer positions = 0.253291% +buckets with 3 minimizer positions = 0.0530717% +buckets with 4 minimizer positions = 0.0227569% +buckets with 5 minimizer positions = 0.012325% +buckets with 6 minimizer positions = 0.00748405% +buckets with 7 minimizer positions = 0.00509695% +buckets with 8 minimizer positions = 0.00359148% +buckets with 9 minimizer positions = 0.00270803% +buckets with 10 minimizer positions = 0.0021383% +buckets with 11 minimizer positions = 0.00159201% +buckets with 12 minimizer positions = 0.00125666% +buckets with 13 minimizer positions = 0.00109619% +buckets with 14 minimizer positions = 0.000941141% +buckets with 15 minimizer positions = 0.000795102% +buckets with 16 minimizer positions = 0.000685122% +max_bucket_size = 794 +=== step 7 (build sparse and skew index): 1.27728 [sec] (1.10563 [ns/kmer]) +=== total time: 16.0072 [sec] (13.8561 [ns/kmer]) +total index size: 551471063 [B] -- 551.471 [MB] +SPACE BREAKDOWN: + mphf: 0.138428 [bits/kmer] (2.88327 [bits/key]) -- 3.62483% + strings_offsets: 0.0958242 [bits/kmer] -- 2.50922% + control_codewords: 1.53635 [bits/kmer] -- 40.2303% + mid_load_buckets: 0.0184383 [bits/kmer] -- 0.482819% + begin_buckets_of_size: 1.85587e-06 [bits/kmer] -- 4.85973e-05% + strings: 2.01672 [bits/kmer] -- 52.8092% + skew_index: 0.0131204 [bits/kmer] -- 0.343567% + weights: 1.27418e-06 [bits/kmer] -- 3.33653e-05% + -------------- + total: 3.81888 [bits/kmer] +2025-11-11 00:53:11: saving data structure to disk... +2025-11-11 00:53:13: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.sshash +2025-11-11 00:53:14: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +read 1000000 sequences, 1159508892 bases, 1097508892 kmers +read 2000000 sequences, 2171598594 bases, 2047598594 kmers +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +cost: 2.0 + 0.118255 [bits/kmer] +max string length = 124282 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 17 +num bits per_string_id = 22 +=== step 1 (encode strings): 7.28759 [sec] (2.62965 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 1.03232 [sec] (0.372501 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 7.04596 [sec] (2.54246 [ns/kmer]) +num_minimizers = 122838669 +num_minimizer_positions = 140756047 +num_super_kmers = 140756047 +building minimizers MPHF with 64 threads and 41 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 4.76075 [sec] (1.71787 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818794035679118.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 4.83891 [sec] (1.74607 [ns/kmer]) +=== step 6 (merging minimizers tuples): 10.7198 [sec] (3.86814 [ns/kmer]) +num_bits_per_offset = 32 +num_buckets_larger_than_1_not_in_skew_index 3097190/122838669 (2.52135%) +num_buckets_in_skew_index 28203/122838669 (0.0229594%) +max_bucket_size 147936 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 12724460/140756047 (9.04008%) +num_minimizer_positions_of_buckets_in_skew_index 8318311/140756047 (5.90974%) +=== step 7.1 (build sparse index): 1.61553 [sec] (0.582947 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 25196923 + partition = 1: num kmers in buckets of size > 128 and <= 256: 21919654 + partition = 2: num kmers in buckets of size > 256 and <= 512: 19634878 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 18051454 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 17018125 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 14085569 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 9296403 + partition = 7: num kmers in buckets of size > 8192 and <= 147936: 20255122 +num kmers in skew index = 145458128 (5.2487%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 25196923 + building MPHF with 64 threads and 9 partitions (avg. partition size = 3000000)... + built mphs[0] for 25196923 kmers; bits/key = 2.56001 + built positions[0] for 25196923 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 21919654 + building MPHF with 64 threads and 8 partitions (avg. partition size = 3000000)... + built mphs[1] for 21919654 kmers; bits/key = 2.60671 + built positions[1] for 21919654 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 19634878 + building MPHF with 64 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[2] for 19634878 kmers; bits/key = 2.55594 + built positions[2] for 19634878 kmers; bits/key = 9.00002 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 18051454 + building MPHF with 64 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[3] for 18051454 kmers; bits/key = 2.60116 + built positions[3] for 18051454 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 17018125 + building MPHF with 64 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[4] for 17018125 kmers; bits/key = 2.58264 + built positions[4] for 17018125 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 14085569 + building MPHF with 64 threads and 5 partitions (avg. partition size = 3000000)... + built mphs[5] for 14085569 kmers; bits/key = 2.54674 + built positions[5] for 14085569 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 9296403 + building MPHF with 64 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[6] for 9296403 kmers; bits/key = 2.72206 + built positions[6] for 9296403 kmers; bits/key = 13 + lower = 8192; upper = 147936; num_bits_per_pos = 18; num_kmers_in_partition = 20255122 + building MPHF with 64 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[7] for 20255122 kmers; bits/key = 2.57505 + built positions[7] for 20255122 kmers; bits/key = 18 +=== step 7.2 (build skew index): 17.3325 [sec] (6.25423 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 97.4557% +buckets with 2 minimizer positions = 1.46862% +buckets with 3 minimizer positions = 0.405135% +buckets with 4 minimizer positions = 0.185188% +buckets with 5 minimizer positions = 0.10523% +buckets with 6 minimizer positions = 0.0680771% +buckets with 7 minimizer positions = 0.0480598% +buckets with 8 minimizer positions = 0.0352926% +buckets with 9 minimizer positions = 0.0271502% +buckets with 10 minimizer positions = 0.0217871% +buckets with 11 minimizer positions = 0.0176866% +buckets with 12 minimizer positions = 0.014607% +buckets with 13 minimizer positions = 0.0121411% +buckets with 14 minimizer positions = 0.0105985% +buckets with 15 minimizer positions = 0.00909811% +buckets with 16 minimizer positions = 0.00794457% +max_bucket_size = 147936 +=== step 7 (build sparse and skew index): 19.253 [sec] (6.94726 [ns/kmer]) +=== total time: 54.9384 [sec] (19.8239 [ns/kmer]) +total index size: 1647878160 [B] -- 1647.88 [MB] +SPACE BREAKDOWN: + mphf: 0.125211 [bits/kmer] (2.82485 [bits/key]) -- 2.63218% + strings_offsets: 0.11255 [bits/kmer] -- 2.36601% + control_codewords: 1.46273 [bits/kmer] -- 30.7492% + mid_load_buckets: 0.146928 [bits/kmer] -- 3.08869% + begin_buckets_of_size: 7.7364e-07 [bits/kmer] -- 1.62633e-05% + strings: 2.11826 [bits/kmer] -- 44.5296% + skew_index: 0.791283 [bits/kmer] -- 16.6342% + weights: 5.31156e-07 [bits/kmer] -- 1.11659e-05% + -------------- + total: 4.75695 [bits/kmer] +2025-11-11 00:54:08: saving data structure to disk... +2025-11-11 00:54:15: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/axolotl.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k63.sshash +2025-11-11 00:54:16: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/axolotl.k63.eulertigs.fa.gz'... +read 1000000 sequences, 367776209 bases, 305776209 kmers +read 2000000 sequences, 737985819 bases, 613985819 kmers +read 3000000 sequences, 1110904173 bases, 924904173 kmers +read 4000000 sequences, 1483811415 bases, 1235811415 kmers +read 5000000 sequences, 1859655851 bases, 1549655851 kmers +read 6000000 sequences, 2238082976 bases, 1866082976 kmers +read 7000000 sequences, 2618741364 bases, 2184741364 kmers +read 8000000 sequences, 3003367751 bases, 2507367751 kmers +read 9000000 sequences, 3393382254 bases, 2835382254 kmers +read 10000000 sequences, 3787981427 bases, 3167981427 kmers +read 11000000 sequences, 4184097126 bases, 3502097126 kmers +read 12000000 sequences, 4584570246 bases, 3840570246 kmers +read 13000000 sequences, 4992446542 bases, 4186446542 kmers +read 14000000 sequences, 5404831631 bases, 4536831631 kmers +read 15000000 sequences, 5823371753 bases, 4893371753 kmers +read 16000000 sequences, 6248734465 bases, 5256734465 kmers +read 17000000 sequences, 6682521466 bases, 5628521466 kmers +read 18000000 sequences, 7124787217 bases, 6008787217 kmers +read 19000000 sequences, 7580114109 bases, 6402114109 kmers +read 20000000 sequences, 8047294111 bases, 6807294111 kmers +read 21000000 sequences, 8529676866 bases, 7227676866 kmers +read 22000000 sequences, 9033767795 bases, 7669767795 kmers +read 23000000 sequences, 9560185658 bases, 8134185658 kmers +read 24000000 sequences, 10119240080 bases, 8631240080 kmers +read 25000000 sequences, 10718751300 bases, 9168751300 kmers +read 26000000 sequences, 11378640529 bases, 9766640529 kmers +read 27000000 sequences, 12134212218 bases, 10460212218 kmers +read 28000000 sequences, 13067071790 bases, 11331071790 kmers +read 29000000 sequences, 13626049624 bases, 11828049624 kmers +read 30000000 sequences, 13960720037 bases, 12100720037 kmers +read 31000000 sequences, 14294970673 bases, 12372970673 kmers +read 32000000 sequences, 14628836645 bases, 12644836645 kmers +read 33000000 sequences, 14963175436 bases, 12917175436 kmers +read 34000000 sequences, 15298218879 bases, 13190218879 kmers +read 35000000 sequences, 15633699282 bases, 13463699282 kmers +read 36000000 sequences, 15967703945 bases, 13735703945 kmers +read 37000000 sequences, 16302024026 bases, 14008024026 kmers +read 38000000 sequences, 16635791931 bases, 14279791931 kmers +read 39000000 sequences, 16971526422 bases, 14553526422 kmers +read 40000000 sequences, 17307281294 bases, 14827281294 kmers +read 41000000 sequences, 17645308371 bases, 15103308371 kmers +read 42000000 sequences, 17983412927 bases, 15379412927 kmers +read 43000000 sequences, 18320303922 bases, 15654303922 kmers +read 44000000 sequences, 18658298765 bases, 15930298765 kmers +read 45000000 sequences, 18996960738 bases, 16206960738 kmers +read 46000000 sequences, 19337650486 bases, 16485650486 kmers +read 47000000 sequences, 19678229737 bases, 16764229737 kmers +read 48000000 sequences, 20021474847 bases, 17045474847 kmers +read 49000000 sequences, 20364403409 bases, 17326403409 kmers +read 50000000 sequences, 20708621449 bases, 17608621449 kmers +read 51000000 sequences, 21051850139 bases, 17889850139 kmers +read 52000000 sequences, 21397390032 bases, 18173390032 kmers +read 53000000 sequences, 21743414843 bases, 18457414843 kmers +read 54000000 sequences, 22090330124 bases, 18742330124 kmers +read 55000000 sequences, 22439410639 bases, 19029410639 kmers +read 56000000 sequences, 22787640700 bases, 19315640700 kmers +read 57000000 sequences, 23137101573 bases, 19603101573 kmers +read 58000000 sequences, 23487411623 bases, 19891411623 kmers +read 59000000 sequences, 23839869821 bases, 20181869821 kmers +read 60000000 sequences, 24191291613 bases, 20471291613 kmers +read 61000000 sequences, 24545366070 bases, 20763366070 kmers +read 62000000 sequences, 24900738859 bases, 21056738859 kmers +read 63000000 sequences, 25257104822 bases, 21351104822 kmers +read 64000000 sequences, 25617509594 bases, 21649509594 kmers +read 65000000 sequences, 25977036607 bases, 21947036607 kmers +read 66000000 sequences, 26338121105 bases, 22246121105 kmers +read 67000000 sequences, 26701662537 bases, 22547662537 kmers +read 67725914 sequences, 26965776908 bases, 22766770240 kmers +num_kmers 22766770240 +cost: 2.0 + 0.368872 [bits/kmer] +max string length = 77847 +num bits per_absolute_offset = 35 +num bits per_relative_offset = 17 +num bits per_string_id = 27 +=== step 1 (encode strings): 70.9055 [sec] (3.11443 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.64.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.65.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.66.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.67.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.68.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.69.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.70.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.71.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.72.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.73.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.74.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.75.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.76.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.77.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.78.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.79.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.80.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.81.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.82.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.83.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.84.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.85.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.86.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.87.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.88.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.89.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.90.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.91.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.92.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.93.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.94.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.95.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.96.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.97.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.98.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.99.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.100.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.101.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.102.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.103.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.104.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.105.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.106.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.107.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.108.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.109.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.110.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.111.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.112.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.113.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.114.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.115.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.116.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.117.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.118.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.119.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.120.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.121.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.122.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.123.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.124.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.125.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.126.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.127.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.128.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.129.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.130.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.131.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.132.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.133.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.134.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.135.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.136.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.137.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.138.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.139.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.140.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.141.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.142.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.143.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.144.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.145.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.146.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.147.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.148.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.149.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.150.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.151.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.152.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.153.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.154.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.155.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.156.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.157.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.158.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.159.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.160.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.161.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.162.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.163.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.164.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.165.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.166.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.167.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.168.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.169.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.170.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.171.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.172.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.173.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.174.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.175.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.176.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.177.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.178.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.179.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.180.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.181.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.182.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.183.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.184.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.185.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.186.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.187.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.188.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.189.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.190.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.191.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.192.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.193.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.194.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.195.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.196.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.197.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.198.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.199.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.200.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.201.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.202.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.203.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.204.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.205.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.206.bin'... +=== step 2 (compute minimizer tuples): 8.81543 [sec] (0.387206 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +processed 700000000 minimizer tuples +processed 800000000 minimizer tuples +processed 900000000 minimizer tuples +processed 1000000000 minimizer tuples +processed 1100000000 minimizer tuples +processed 1200000000 minimizer tuples +=== step 3 (merging minimizer tuples): 82.7074 [sec] (3.63281 [ns/kmer]) +num_minimizers = 885645159 +num_minimizer_positions = 1200568435 +num_super_kmers = 1200568435 +building minimizers MPHF with 64 threads and 296 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 32.5256 [sec] (1.42864 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.2.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 37.3106 [sec] (1.63882 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762818856201662731.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +processed 700000000 minimizer tuples +processed 800000000 minimizer tuples +processed 900000000 minimizer tuples +processed 1000000000 minimizer tuples +processed 1100000000 minimizer tuples +processed 1200000000 minimizer tuples +=== step 6 (merging minimizers tuples): 169.296 [sec] (7.43611 [ns/kmer]) +num_bits_per_offset = 35 +num_buckets_larger_than_1_not_in_skew_index 69198834/885645159 (7.81338%) +num_buckets_in_skew_index 487719/885645159 (0.0550693%) +max_bucket_size 94315 +log2_max_bucket_size 17 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 286596361/1200568435 (23.8717%) +num_minimizer_positions_of_buckets_in_skew_index 98013468/1200568435 (8.16392%) +=== step 7.1 (build sparse index): 21.3249 [sec] (0.936669 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 425537767 + partition = 1: num kmers in buckets of size > 128 and <= 256: 362066886 + partition = 2: num kmers in buckets of size > 256 and <= 512: 301939123 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 228603652 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 148530360 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 81213417 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 38708611 + partition = 7: num kmers in buckets of size > 8192 and <= 94315: 36554519 +num kmers in skew index = 1623154335 (7.12949%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 425537767 + building MPHF with 64 threads and 142 partitions (avg. partition size = 3000000)... + built mphs[0] for 425537767 kmers; bits/key = 2.55898 + built positions[0] for 425537767 kmers; bits/key = 7 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 362066886 + building MPHF with 64 threads and 121 partitions (avg. partition size = 3000000)... + built mphs[1] for 362066886 kmers; bits/key = 2.55692 + built positions[1] for 362066886 kmers; bits/key = 8 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 301939123 + building MPHF with 64 threads and 101 partitions (avg. partition size = 3000000)... + built mphs[2] for 301939123 kmers; bits/key = 2.55728 + built positions[2] for 301939123 kmers; bits/key = 9 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 228603652 + building MPHF with 64 threads and 77 partitions (avg. partition size = 3000000)... + built mphs[3] for 228603652 kmers; bits/key = 2.56716 + built positions[3] for 228603652 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 148530360 + building MPHF with 64 threads and 50 partitions (avg. partition size = 3000000)... + built mphs[4] for 148530360 kmers; bits/key = 2.56647 + built positions[4] for 148530360 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 81213417 + building MPHF with 64 threads and 28 partitions (avg. partition size = 3000000)... + built mphs[5] for 81213417 kmers; bits/key = 2.53824 + built positions[5] for 81213417 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 38708611 + building MPHF with 64 threads and 13 partitions (avg. partition size = 3000000)... + built mphs[6] for 38708611 kmers; bits/key = 2.54261 + built positions[6] for 38708611 kmers; bits/key = 13 + lower = 8192; upper = 94315; num_bits_per_pos = 17; num_kmers_in_partition = 36554519 + building MPHF with 64 threads and 13 partitions (avg. partition size = 3000000)... + built mphs[7] for 36554519 kmers; bits/key = 2.55068 + built positions[7] for 36554519 kmers; bits/key = 17 +=== step 7.2 (build skew index): 242.98 [sec] (10.6726 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 92.1315% +buckets with 2 minimizer positions = 4.3466% +buckets with 3 minimizer positions = 1.2957% +buckets with 4 minimizer positions = 0.623479% +buckets with 5 minimizer positions = 0.366055% +buckets with 6 minimizer positions = 0.23929% +buckets with 7 minimizer positions = 0.167326% +buckets with 8 minimizer positions = 0.12317% +buckets with 9 minimizer positions = 0.0938571% +buckets with 10 minimizer positions = 0.0737212% +buckets with 11 minimizer positions = 0.0591028% +buckets with 12 minimizer positions = 0.0484583% +buckets with 13 minimizer positions = 0.0404011% +buckets with 14 minimizer positions = 0.0339243% +buckets with 15 minimizer positions = 0.0291173% +buckets with 16 minimizer positions = 0.0249361% +max_bucket_size = 94315 +=== step 7 (build sparse and skew index): 267.119 [sec] (11.7329 [ns/kmer]) +=== total time: 668.68 [sec] (29.3709 [ns/kmer]) +total index size: 15531396563 [B] -- 15531.4 [MB] +SPACE BREAKDOWN: + mphf: 0.110167 [bits/kmer] (2.832 [bits/key]) -- 2.01861% + strings_offsets: 0.162621 [bits/kmer] -- 2.97973% + control_codewords: 1.40043 [bits/kmer] -- 25.6603% + mid_load_buckets: 0.440593 [bits/kmer] -- 8.07306% + begin_buckets_of_size: 9.41723e-08 [bits/kmer] -- 1.72554e-06% + strings: 2.36887 [bits/kmer] -- 43.4053% + skew_index: 0.974887 [bits/kmer] -- 17.863% + weights: 6.46556e-08 [bits/kmer] -- 1.1847e-06% + -------------- + total: 5.45757 [bits/kmer] +2025-11-11 01:05:24: saving data structure to disk... +2025-11-11 01:06:37: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.sshash +2025-11-11 01:06:38: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/hprc.k63.eulertigs.fa.gz'... +read 1000000 sequences, 123618042 bases, 61618042 kmers +read 2000000 sequences, 248006699 bases, 124006699 kmers +read 3000000 sequences, 373165849 bases, 187165849 kmers +read 4000000 sequences, 499216806 bases, 251216806 kmers +read 5000000 sequences, 626504126 bases, 316504126 kmers +read 6000000 sequences, 754844978 bases, 382844978 kmers +read 7000000 sequences, 884611558 bases, 450611558 kmers +read 8000000 sequences, 1015719906 bases, 519719906 kmers +read 9000000 sequences, 1148384128 bases, 590384128 kmers +read 10000000 sequences, 1283305186 bases, 663305186 kmers +read 11000000 sequences, 1420392618 bases, 738392618 kmers +read 12000000 sequences, 1559905103 bases, 815905103 kmers +read 13000000 sequences, 1702344045 bases, 896344045 kmers +read 14000000 sequences, 1848153905 bases, 980153905 kmers +read 15000000 sequences, 1998487611 bases, 1068487611 kmers +read 16000000 sequences, 2153589528 bases, 1161589528 kmers +read 17000000 sequences, 2314472162 bases, 1260472162 kmers +read 18000000 sequences, 2483331066 bases, 1367331066 kmers +read 19000000 sequences, 2661730312 bases, 1483730312 kmers +read 20000000 sequences, 2852409810 bases, 1612409810 kmers +read 21000000 sequences, 3060194564 bases, 1758194564 kmers +read 22000000 sequences, 3290140238 bases, 1926140238 kmers +read 23000000 sequences, 3552570970 bases, 2126570970 kmers +read 24000000 sequences, 3863888905 bases, 2375888905 kmers +read 25000000 sequences, 4253358029 bases, 2703358029 kmers +read 26000000 sequences, 4780487647 bases, 3168487647 kmers +read 27000000 sequences, 5604484526 bases, 3930484526 kmers +read 28000000 sequences, 5925952935 bases, 4189952935 kmers +read 29000000 sequences, 6039783917 bases, 4241783917 kmers +read 30000000 sequences, 6153634902 bases, 4293634902 kmers +read 31000000 sequences, 6267684053 bases, 4345684053 kmers +read 32000000 sequences, 6381788267 bases, 4397788267 kmers +read 33000000 sequences, 6496092541 bases, 4450092541 kmers +read 34000000 sequences, 6610456809 bases, 4502456809 kmers +read 35000000 sequences, 6725025608 bases, 4555025608 kmers +read 36000000 sequences, 6839697388 bases, 4607697388 kmers +read 37000000 sequences, 6954566139 bases, 4660566139 kmers +read 38000000 sequences, 7069620814 bases, 4713620814 kmers +read 39000000 sequences, 7184856392 bases, 4766856392 kmers +read 40000000 sequences, 7300352498 bases, 4820352498 kmers +read 41000000 sequences, 7415987203 bases, 4873987203 kmers +read 42000000 sequences, 7531875755 bases, 4927875755 kmers +read 43000000 sequences, 7647987237 bases, 4981987237 kmers +read 44000000 sequences, 7764325565 bases, 5036325565 kmers +read 45000000 sequences, 7880919196 bases, 5090919196 kmers +read 46000000 sequences, 7997748943 bases, 5145748943 kmers +read 47000000 sequences, 8114852221 bases, 5200852221 kmers +read 48000000 sequences, 8232292777 bases, 5256292777 kmers +read 49000000 sequences, 8349993383 bases, 5311993383 kmers +read 50000000 sequences, 8468086161 bases, 5368086161 kmers +read 51000000 sequences, 8586456588 bases, 5424456588 kmers +read 52000000 sequences, 8705279881 bases, 5481279881 kmers +read 53000000 sequences, 8824571697 bases, 5538571697 kmers +read 54000000 sequences, 8944259928 bases, 5596259928 kmers +read 55000000 sequences, 9064361649 bases, 5654361649 kmers +read 56000000 sequences, 9185024212 bases, 5713024212 kmers +read 57000000 sequences, 9306137968 bases, 5772137968 kmers +read 58000000 sequences, 9427875971 bases, 5831875971 kmers +read 59000000 sequences, 9550182119 bases, 5892182119 kmers +read 59568965 sequences, 9620061299 bases, 5926785469 kmers +num_kmers 5926785469 +cost: 2.0 + 1.2463 [bits/kmer] +max string length = 27681 +num bits per_absolute_offset = 34 +num bits per_relative_offset = 15 +num bits per_string_id = 26 +=== step 1 (encode strings): 27.8179 [sec] (4.69359 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.64.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.65.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.66.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.67.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.68.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.69.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.70.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.71.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.72.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.73.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.74.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.75.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.76.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.77.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.78.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.79.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.80.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.81.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.82.bin'... +=== step 2 (compute minimizer tuples): 6.62651 [sec] (1.11806 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +=== step 3 (merging minimizer tuples): 22.5011 [sec] (3.79651 [ns/kmer]) +num_minimizers = 274429863 +num_minimizer_positions = 406062800 +num_super_kmers = 406062800 +building minimizers MPHF with 64 threads and 92 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 11.0494 [sec] (1.86432 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819598072253670.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 13.5542 [sec] (2.28693 [ns/kmer]) +=== step 6 (merging minimizers tuples): 49.4978 [sec] (8.35154 [ns/kmer]) +num_bits_per_offset = 34 +num_buckets_larger_than_1_not_in_skew_index 51299487/274429863 (18.6931%) +num_buckets_in_skew_index 131946/274429863 (0.04808%) +max_bucket_size 176220 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 146053805/406062800 (35.9683%) +num_minimizer_positions_of_buckets_in_skew_index 37010565/406062800 (9.11449%) +=== step 7.1 (build sparse index): 9.0324 [sec] (1.524 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 83316513 + partition = 1: num kmers in buckets of size > 128 and <= 256: 74231522 + partition = 2: num kmers in buckets of size > 256 and <= 512: 67271777 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 57125353 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 46013184 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 37921473 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 28349062 + partition = 7: num kmers in buckets of size > 8192 and <= 176220: 62824185 +num kmers in skew index = 457053069 (7.71165%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 83316513 + building MPHF with 64 threads and 28 partitions (avg. partition size = 3000000)... + built mphs[0] for 83316513 kmers; bits/key = 2.55669 + built positions[0] for 83316513 kmers; bits/key = 7 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 74231522 + building MPHF with 64 threads and 25 partitions (avg. partition size = 3000000)... + built mphs[1] for 74231522 kmers; bits/key = 2.57033 + built positions[1] for 74231522 kmers; bits/key = 8 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 67271777 + building MPHF with 64 threads and 23 partitions (avg. partition size = 3000000)... + built mphs[2] for 67271777 kmers; bits/key = 2.54468 + built positions[2] for 67271777 kmers; bits/key = 9.00001 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 57125353 + building MPHF with 64 threads and 20 partitions (avg. partition size = 3000000)... + built mphs[3] for 57125353 kmers; bits/key = 2.53252 + built positions[3] for 57125353 kmers; bits/key = 10 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 46013184 + building MPHF with 64 threads and 16 partitions (avg. partition size = 3000000)... + built mphs[4] for 46013184 kmers; bits/key = 2.53117 + built positions[4] for 46013184 kmers; bits/key = 11 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 37921473 + building MPHF with 64 threads and 13 partitions (avg. partition size = 3000000)... + built mphs[5] for 37921473 kmers; bits/key = 2.56414 + built positions[5] for 37921473 kmers; bits/key = 12 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 28349062 + building MPHF with 64 threads and 10 partitions (avg. partition size = 3000000)... + built mphs[6] for 28349062 kmers; bits/key = 2.56358 + built positions[6] for 28349062 kmers; bits/key = 13 + lower = 8192; upper = 176220; num_bits_per_pos = 18; num_kmers_in_partition = 62824185 + building MPHF with 64 threads and 21 partitions (avg. partition size = 3000000)... + built mphs[7] for 62824185 kmers; bits/key = 2.55204 + built positions[7] for 62824185 kmers; bits/key = 18 +=== step 7.2 (build skew index): 59.2702 [sec] (10.0004 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 81.2588% +buckets with 2 minimizer positions = 13.7158% +buckets with 3 minimizer positions = 2.82467% +buckets with 4 minimizer positions = 0.819903% +buckets with 5 minimizer positions = 0.371916% +buckets with 6 minimizer positions = 0.218785% +buckets with 7 minimizer positions = 0.143859% +buckets with 8 minimizer positions = 0.101665% +buckets with 9 minimizer positions = 0.0751737% +buckets with 10 minimizer positions = 0.0573965% +buckets with 11 minimizer positions = 0.0451926% +buckets with 12 minimizer positions = 0.0368258% +buckets with 13 minimizer positions = 0.0299698% +buckets with 14 minimizer positions = 0.025193% +buckets with 15 minimizer positions = 0.0214499% +buckets with 16 minimizer positions = 0.0183803% +max_bucket_size = 176220 +=== step 7 (build sparse and skew index): 69.3598 [sec] (11.7028 [ns/kmer]) +=== total time: 200.407 [sec] (33.8137 [ns/kmer]) +total index size: 5431161974 [B] -- 5431.16 [MB] +SPACE BREAKDOWN: + mphf: 0.131165 [bits/kmer] (2.83273 [bits/key]) -- 1.78918% + strings_offsets: 0.273643 [bits/kmer] -- 3.73268% + control_codewords: 1.62062 [bits/kmer] -- 22.1063% + mid_load_buckets: 0.837862 [bits/kmer] -- 11.429% + begin_buckets_of_size: 3.61748e-07 [bits/kmer] -- 4.93449e-06% + strings: 3.2463 [bits/kmer] -- 44.2818% + skew_index: 1.22142 [bits/kmer] -- 16.661% + weights: 2.48364e-07 [bits/kmer] -- 3.38786e-06% + -------------- + total: 7.33101 [bits/kmer] +2025-11-11 01:09:58: saving data structure to disk... +2025-11-11 01:10:23: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ec.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/ec.k63.sshash +2025-11-11 01:10:24: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/ec.k63.eulertigs.fa.gz'... +read 1000000 sequences, 146366718 bases, 84366718 kmers +read 2000000 sequences, 296791327 bases, 172791327 kmers +read 3000000 sequences, 452465563 bases, 266465563 kmers +read 4000000 sequences, 614198433 bases, 366198433 kmers +read 5000000 sequences, 783034539 bases, 473034539 kmers +read 6000000 sequences, 966455345 bases, 594455345 kmers +read 7000000 sequences, 1177950483 bases, 743950483 kmers +read 8000000 sequences, 1408450282 bases, 912450282 kmers +read 9000000 sequences, 1536152549 bases, 978152549 kmers +read 10000000 sequences, 1664904910 bases, 1044904910 kmers +read 11000000 sequences, 1794061925 bases, 1112061925 kmers +read 12000000 sequences, 1923575230 bases, 1179575230 kmers +read 13000000 sequences, 2053070210 bases, 1247070210 kmers +read 14000000 sequences, 2183943636 bases, 1315943636 kmers +read 15000000 sequences, 2316373175 bases, 1386373175 kmers +read 16000000 sequences, 2449041041 bases, 1457041041 kmers +read 17000000 sequences, 2582562981 bases, 1528562981 kmers +read 18000000 sequences, 2717390529 bases, 1601390529 kmers +read 19000000 sequences, 2853100531 bases, 1675100531 kmers +read 20000000 sequences, 2990606476 bases, 1750606476 kmers +read 21000000 sequences, 3129300415 bases, 1827300415 kmers +read 22000000 sequences, 3269949888 bases, 1905949888 kmers +read 23000000 sequences, 3412658016 bases, 1986658016 kmers +read 23474327 sequences, 3483064285 bases, 2027656011 kmers +num_kmers 2027656011 +cost: 2.0 + 1.43556 [bits/kmer] +max string length = 643923 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 20 +num bits per_string_id = 25 +=== step 1 (encode strings): 10.3277 [sec] (5.09343 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.979513 [sec] (0.483077 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 7.74007 [sec] (3.81725 [ns/kmer]) +num_minimizers = 76630489 +num_minimizer_positions = 141268659 +num_super_kmers = 141268659 +building minimizers MPHF with 64 threads and 26 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 3.33414 [sec] (1.64433 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819824382133876.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 4.4719 [sec] (2.20545 [ns/kmer]) +=== step 6 (merging minimizers tuples): 10.5513 [sec] (5.20368 [ns/kmer]) +num_bits_per_offset = 32 +num_buckets_larger_than_1_not_in_skew_index 12031601/76630489 (15.7008%) +num_buckets_in_skew_index 62262/76630489 (0.0812496%) +max_bucket_size 281880 +log2_max_bucket_size 19 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 66085279/141268659 (46.7799%) +num_minimizer_positions_of_buckets_in_skew_index 10646754/141268659 (7.53653%) +=== step 7.1 (build sparse index): 3.21849 [sec] (1.5873 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 65292887 + partition = 1: num kmers in buckets of size > 128 and <= 256: 16768176 + partition = 2: num kmers in buckets of size > 256 and <= 512: 6866326 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 4652668 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 4307414 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 3858894 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 5911410 + partition = 7: num kmers in buckets of size > 8192 and <= 281880: 31685511 +num kmers in skew index = 139343286 (6.87214%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 65292887 + building MPHF with 64 threads and 22 partitions (avg. partition size = 3000000)... + built mphs[0] for 65292887 kmers; bits/key = 2.55008 + built positions[0] for 65292887 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 16768176 + building MPHF with 64 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[1] for 16768176 kmers; bits/key = 2.56383 + built positions[1] for 16768176 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 6866326 + building MPHF with 64 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[2] for 6866326 kmers; bits/key = 2.72645 + built positions[2] for 6866326 kmers; bits/key = 9.00006 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 4652668 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[3] for 4652668 kmers; bits/key = 2.812 + built positions[3] for 4652668 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 4307414 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[4] for 4307414 kmers; bits/key = 2.80506 + built positions[4] for 4307414 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 3858894 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[5] for 3858894 kmers; bits/key = 3.08272 + built positions[5] for 3858894 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 5911410 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[6] for 5911410 kmers; bits/key = 2.51936 + built positions[6] for 5911410 kmers; bits/key = 13.0001 + lower = 8192; upper = 281880; num_bits_per_pos = 19; num_kmers_in_partition = 31685511 + building MPHF with 64 threads and 11 partitions (avg. partition size = 3000000)... + built mphs[7] for 31685511 kmers; bits/key = 2.52689 + built positions[7] for 31685511 kmers; bits/key = 19 +=== step 7.2 (build skew index): 19.6513 [sec] (9.69165 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 84.2179% +buckets with 2 minimizer positions = 7.76108% +buckets with 3 minimizer positions = 2.50949% +buckets with 4 minimizer positions = 1.23307% +buckets with 5 minimizer positions = 0.750442% +buckets with 6 minimizer positions = 0.516927% +buckets with 7 minimizer positions = 0.38172% +buckets with 8 minimizer positions = 0.296813% +buckets with 9 minimizer positions = 0.239214% +buckets with 10 minimizer positions = 0.197673% +buckets with 11 minimizer positions = 0.166211% +buckets with 12 minimizer positions = 0.142668% +buckets with 13 minimizer positions = 0.124415% +buckets with 14 minimizer positions = 0.108818% +buckets with 15 minimizer positions = 0.0962646% +buckets with 16 minimizer positions = 0.0863325% +max_bucket_size = 281880 +=== step 7 (build sparse and skew index): 23.2447 [sec] (11.4638 [ns/kmer]) +=== total time: 60.6493 [sec] (29.911 [ns/kmer]) +total index size: 1824281560 [B] -- 1824.28 [MB] +SPACE BREAKDOWN: + mphf: 0.108372 [bits/kmer] (2.86754 [bits/key]) -- 1.50567% + strings_offsets: 0.291878 [bits/kmer] -- 4.05522% + control_codewords: 1.24716 [bits/kmer] -- 17.3274% + mid_load_buckets: 1.04294 [bits/kmer] -- 14.4902% + begin_buckets_of_size: 1.05738e-06 [bits/kmer] -- 1.46907e-05% + strings: 3.43556 [bits/kmer] -- 47.732% + skew_index: 1.07169 [bits/kmer] -- 14.8895% + weights: 7.25961e-07 [bits/kmer] -- 1.00862e-05% + -------------- + total: 7.1976 [bits/kmer] +2025-11-11 01:11:25: saving data structure to disk... +2025-11-11 01:11:33: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.sshash +2025-11-11 01:11:33: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +read 1000000 sequences, 195596653 bases, 133596653 kmers +read 2000000 sequences, 328155468 bases, 204155468 kmers +read 3000000 sequences, 461539333 bases, 275539333 kmers +read 4000000 sequences, 595230806 bases, 347230806 kmers +read 5000000 sequences, 730213776 bases, 420213776 kmers +read 6000000 sequences, 866286185 bases, 494286185 kmers +read 7000000 sequences, 1002901529 bases, 568901529 kmers +read 8000000 sequences, 1142394085 bases, 646394085 kmers +read 9000000 sequences, 1282878983 bases, 724878983 kmers +read 10000000 sequences, 1425636398 bases, 805636398 kmers +read 11000000 sequences, 1571133044 bases, 889133044 kmers +read 12000000 sequences, 1720028668 bases, 976028668 kmers +read 13000000 sequences, 1874571339 bases, 1068571339 kmers +read 14000000 sequences, 2036208720 bases, 1168208720 kmers +read 15000000 sequences, 2208992707 bases, 1278992707 kmers +read 16000000 sequences, 2403498478 bases, 1411498478 kmers +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +cost: 2.0 + 1.35283 [bits/kmer] +max string length = 499189 +num bits per_absolute_offset = 32 +num bits per_relative_offset = 19 +num bits per_string_id = 24 +=== step 1 (encode strings): 7.85856 [sec] (5.15348 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.764585 [sec] (0.501399 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.bin' +processed 100000000 minimizer tuples +=== step 3 (merging minimizer tuples): 5.74818 [sec] (3.76954 [ns/kmer]) +num_minimizers = 61951224 +num_minimizer_positions = 105337248 +num_super_kmers = 105337248 +building minimizers MPHF with 64 threads and 21 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 2.75372 [sec] (1.80583 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819893593617488.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 3.39227 [sec] (2.22458 [ns/kmer]) +=== step 6 (merging minimizers tuples): 7.61206 [sec] (4.99183 [ns/kmer]) +num_bits_per_offset = 32 +num_buckets_larger_than_1_not_in_skew_index 9999764/61951224 (16.1414%) +num_buckets_in_skew_index 36489/61951224 (0.0588996%) +max_bucket_size 144478 +log2_max_bucket_size 18 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 47622255/105337248 (45.2093%) +num_minimizer_positions_of_buckets_in_skew_index 5800022/105337248 (5.50615%) +=== step 7.1 (build sparse index): 2.3957 [sec] (1.57105 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 32078766 + partition = 1: num kmers in buckets of size > 128 and <= 256: 16092632 + partition = 2: num kmers in buckets of size > 256 and <= 512: 8174536 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 3454318 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 2781070 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 2981930 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 2815305 + partition = 7: num kmers in buckets of size > 8192 and <= 144478: 7418130 +num kmers in skew index = 75796687 (4.97059%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 32078766 + building MPHF with 64 threads and 11 partitions (avg. partition size = 3000000)... + built mphs[0] for 32078766 kmers; bits/key = 2.51437 + built positions[0] for 32078766 kmers; bits/key = 7.00001 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 16092632 + building MPHF with 64 threads and 6 partitions (avg. partition size = 3000000)... + built mphs[1] for 16092632 kmers; bits/key = 2.65398 + built positions[1] for 16092632 kmers; bits/key = 8.00002 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 8174536 + building MPHF with 64 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[2] for 8174536 kmers; bits/key = 2.61889 + built positions[2] for 8174536 kmers; bits/key = 9.00005 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 3454318 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[3] for 3454318 kmers; bits/key = 3.14686 + built positions[3] for 3454318 kmers; bits/key = 10.0001 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 2781070 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2781070)... + built mphs[4] for 2781070 kmers; bits/key = 2.55987 + built positions[4] for 2781070 kmers; bits/key = 11.0001 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 2981930 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2981930)... + built mphs[5] for 2981930 kmers; bits/key = 2.55978 + built positions[5] for 2981930 kmers; bits/key = 12.0001 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 2815305 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2815305)... + built mphs[6] for 2815305 kmers; bits/key = 2.55984 + built positions[6] for 2815305 kmers; bits/key = 13.0001 + lower = 8192; upper = 144478; num_bits_per_pos = 18; num_kmers_in_partition = 7418130 + building MPHF with 64 threads and 3 partitions (avg. partition size = 3000000)... + built mphs[7] for 7418130 kmers; bits/key = 2.72795 + built positions[7] for 7418130 kmers; bits/key = 18.0001 +=== step 7.2 (build skew index): 13.4056 [sec] (8.79113 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 83.7998% +buckets with 2 minimizer positions = 8.4968% +buckets with 3 minimizer positions = 2.71676% +buckets with 4 minimizer positions = 1.2809% +buckets with 5 minimizer positions = 0.745569% +buckets with 6 minimizer positions = 0.487324% +buckets with 7 minimizer positions = 0.345178% +buckets with 8 minimizer positions = 0.259136% +buckets with 9 minimizer positions = 0.202319% +buckets with 10 minimizer positions = 0.163595% +buckets with 11 minimizer positions = 0.136582% +buckets with 12 minimizer positions = 0.116401% +buckets with 13 minimizer positions = 0.100962% +buckets with 14 minimizer positions = 0.0888166% +buckets with 15 minimizer positions = 0.0782793% +buckets with 16 minimizer positions = 0.0706992% +max_bucket_size = 144478 +=== step 7 (build sparse and skew index): 16.0764 [sec] (10.5425 [ns/kmer]) +=== total time: 44.2057 [sec] (28.9892 [ns/kmer]) +total index size: 1294767218 [B] -- 1294.77 [MB] +SPACE BREAKDOWN: + mphf: 0.116088 [bits/kmer] (2.85745 [bits/key]) -- 1.70902% + strings_offsets: 0.274587 [bits/kmer] -- 4.04242% + control_codewords: 1.34067 [bits/kmer] -- 19.737% + mid_load_buckets: 0.99935 [bits/kmer] -- 14.7122% + begin_buckets_of_size: 1.40599e-06 [bits/kmer] -- 2.06987e-05% + strings: 3.35283 [bits/kmer] -- 49.3596% + skew_index: 0.709128 [bits/kmer] -- 10.4396% + weights: 9.65307e-07 [bits/kmer] -- 1.4211e-05% + -------------- + total: 6.79265 [bits/kmer] +2025-11-11 01:12:17: saving data structure to disk... +2025-11-11 01:12:23: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k63.eulertigs.fa.gz -k 63 -m 23 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.sshash +2025-11-11 01:12:23: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k63.eulertigs.fa.gz'... +read 800231 sequences, 462130202 bases, 412515880 kmers +num_kmers 412515880 +cost: 2.0 + 0.240545 [bits/kmer] +max string length = 490374 +num bits per_absolute_offset = 29 +num bits per_relative_offset = 19 +num bits per_string_id = 20 +=== step 1 (encode strings): 1.17621 [sec] (2.85131 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.63.bin'... +=== step 2 (compute minimizer tuples): 0.321286 [sec] (0.778845 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.bin' +=== step 3 (merging minimizer tuples): 1.06842 [sec] (2.59001 [ns/kmer]) +num_minimizers = 18448739 +num_minimizer_positions = 20311554 +num_super_kmers = 20311554 +building minimizers MPHF with 64 threads and 7 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 1.06742 [sec] (2.58758 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819943605340003.minimizers.0.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 0.755179 [sec] (1.83067 [ns/kmer]) +=== step 6 (merging minimizers tuples): 1.12785 [sec] (2.73408 [ns/kmer]) +num_bits_per_offset = 29 +num_buckets_larger_than_1_not_in_skew_index 1211725/18448739 (6.56806%) +num_buckets_in_skew_index 16/18448739 (8.67268e-05%) +max_bucket_size 489 +log2_max_bucket_size 9 +num_partitions in skew index 3 +num_minimizer_positions_of_buckets_larger_than_1 3072206/20311554 (15.1254%) +num_minimizer_positions_of_buckets_in_skew_index 2350/20311554 (0.0115698%) +=== step 7.1 (build sparse index): 0.273848 [sec] (0.663848 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 15469 + partition = 1: num kmers in buckets of size > 128 and <= 256: 14628 + partition = 2: num kmers in buckets of size > 256 and <= 489: 2787 +num kmers in skew index = 32884 (0.00797157%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 15469 + building MPHF with 64 threads and 1 partitions (avg. partition size = 15469)... + built mphs[0] for 15469 kmers; bits/key = 2.55168 + built positions[0] for 15469 kmers; bits/key = 7.02101 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 14628 + building MPHF with 64 threads and 1 partitions (avg. partition size = 14628)... + built mphs[1] for 14628 kmers; bits/key = 2.56166 + built positions[1] for 14628 kmers; bits/key = 8.02406 + lower = 256; upper = 489; num_bits_per_pos = 9; num_kmers_in_partition = 2787 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2787)... + built mphs[2] for 2787 kmers; bits/key = 3.06566 + built positions[2] for 2787 kmers; bits/key = 9.11661 +=== step 7.2 (build skew index): 0.039049 [sec] (0.0946606 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 93.4318% +buckets with 2 minimizer positions = 4.75528% +buckets with 3 minimizer positions = 1.08358% +buckets with 4 minimizer positions = 0.365494% +buckets with 5 minimizer positions = 0.158802% +buckets with 6 minimizer positions = 0.080412% +buckets with 7 minimizer positions = 0.0434393% +buckets with 8 minimizer positions = 0.0264408% +buckets with 9 minimizer positions = 0.0161962% +buckets with 10 minimizer positions = 0.0109113% +buckets with 11 minimizer positions = 0.00805475% +buckets with 12 minimizer positions = 0.00528491% +buckets with 13 minimizer positions = 0.00364795% +buckets with 14 minimizer positions = 0.00271563% +buckets with 15 minimizer positions = 0.00196219% +buckets with 16 minimizer positions = 0.00142015% +max_bucket_size = 489 +=== step 7 (build sparse and skew index): 0.363897 [sec] (0.882141 [ns/kmer]) +=== total time: 5.88026 [sec] (14.2546 [ns/kmer]) +total index size: 209086404 [B] -- 209.086 [MB] +SPACE BREAKDOWN: + mphf: 0.136459 [bits/kmer] (3.05124 [bits/key]) -- 3.36533% + strings_offsets: 0.119202 [bits/kmer] -- 2.93974% + control_codewords: 1.34168 [bits/kmer] -- 33.0881% + mid_load_buckets: 0.215978 [bits/kmer] -- 5.32641% + begin_buckets_of_size: 5.19738e-06 [bits/kmer] -- 0.000128177% + strings: 2.24055 [bits/kmer] -- 55.2559% + skew_index: 0.000982963 [bits/kmer] -- 0.0242417% + weights: 3.56835e-06 [bits/kmer] -- 8.80019e-05% + -------------- + total: 4.05485 [bits/kmer] +2025-11-11 01:12:29: saving data structure to disk... +2025-11-11 01:12:30: DONE +./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/jgi_fungi.batch-0.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k63.sshash +2025-11-11 01:12:30: building data structure... +reading file '/mnt/hd2/pibiri/DNA/eulertigs/jgi_fungi.batch-0.k63.eulertigs.fa.gz'... +read 1000000 sequences, 508802169 bases, 446802169 kmers +read 2000000 sequences, 1018081275 bases, 894081275 kmers +read 3000000 sequences, 1530401896 bases, 1344401896 kmers +read 4000000 sequences, 2055069016 bases, 1807069016 kmers +read 5000000 sequences, 2593224248 bases, 2283224248 kmers +read 6000000 sequences, 3139549385 bases, 2767549385 kmers +read 7000000 sequences, 3701263851 bases, 3267263851 kmers +read 8000000 sequences, 4283309251 bases, 3787309251 kmers +read 9000000 sequences, 4887570899 bases, 4329570899 kmers +read 10000000 sequences, 5529157849 bases, 4909157849 kmers +read 11000000 sequences, 6209792203 bases, 5527792203 kmers +read 12000000 sequences, 6962139753 bases, 6218139753 kmers +read 13000000 sequences, 7835035276 bases, 7029035276 kmers +read 14000000 sequences, 8978632454 bases, 8110632454 kmers +read 15000000 sequences, 11212240633 bases, 10282240633 kmers +read 16000000 sequences, 11682448788 bases, 10690448788 kmers +read 17000000 sequences, 12155568466 bases, 11101568466 kmers +read 18000000 sequences, 12632855540 bases, 11516855540 kmers +read 19000000 sequences, 13118014015 bases, 11940014015 kmers +read 20000000 sequences, 13600052606 bases, 12360052606 kmers +read 21000000 sequences, 14089576602 bases, 12787576602 kmers +read 22000000 sequences, 14580292633 bases, 13216292633 kmers +read 23000000 sequences, 15076414826 bases, 13650414826 kmers +read 23030730 sequences, 15091515601 bases, 13663610341 kmers +num_kmers 13663610341 +cost: 2.0 + 0.209008 [bits/kmer] +max string length = 980757 +num bits per_absolute_offset = 34 +num bits per_relative_offset = 20 +num bits per_string_id = 25 +=== step 1 (encode strings): 38.0016 [sec] (2.78123 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.1.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.2.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.3.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.4.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.5.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.6.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.7.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.8.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.9.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.10.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.11.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.12.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.13.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.14.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.15.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.16.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.17.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.18.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.19.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.20.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.21.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.22.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.23.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.24.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.25.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.26.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.27.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.28.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.29.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.30.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.31.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.32.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.33.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.34.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.35.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.36.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.37.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.38.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.39.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.40.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.41.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.42.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.43.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.44.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.45.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.46.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.47.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.48.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.49.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.50.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.51.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.52.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.53.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.54.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.55.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.56.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.57.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.58.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.59.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.60.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.61.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.62.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.63.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.64.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.65.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.66.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.67.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.68.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.69.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.70.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.71.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.72.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.73.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.74.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.75.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.76.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.77.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.78.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.79.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.80.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.81.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.82.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.83.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.84.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.85.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.86.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.87.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.88.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.89.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.90.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.91.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.92.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.93.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.94.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.95.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.96.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.97.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.98.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.99.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.100.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.101.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.102.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.103.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.104.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.105.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.106.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.107.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.108.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.109.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.110.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.111.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.112.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.113.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.114.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.115.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.116.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.117.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.118.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.119.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.120.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.121.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.122.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.123.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.124.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.125.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.126.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.127.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.128.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.129.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.130.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.131.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.132.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.133.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.134.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.135.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.136.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.137.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.138.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.139.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.140.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.141.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.142.bin'... +=== step 2 (compute minimizer tuples): 7.70188 [sec] (0.563678 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +processed 700000000 minimizer tuples +=== step 3 (merging minimizer tuples): 77.1587 [sec] (5.64702 [ns/kmer]) +num_minimizers = 635376539 +num_minimizer_positions = 704332572 +num_super_kmers = 704332572 +building minimizers MPHF with 64 threads and 212 partitions (avg. partition size = 3000000)... +=== step 4 (build mphf): 23.1077 [sec] (1.69119 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.0.bin'... +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.1.bin'... +=== step 5 (replacing minimizer values with MPHF hashes): 23.8057 [sec] (1.74227 [ns/kmer]) +saving to file '/mnt/hd2/pibiri/DNA/tmp_dir/sshash.tmp.run_1762819950410494738.minimizers.bin' +processed 100000000 minimizer tuples +processed 200000000 minimizer tuples +processed 300000000 minimizer tuples +processed 400000000 minimizer tuples +processed 500000000 minimizer tuples +processed 600000000 minimizer tuples +processed 700000000 minimizer tuples +=== step 6 (merging minimizers tuples): 73.107 [sec] (5.35049 [ns/kmer]) +num_bits_per_offset = 34 +num_buckets_larger_than_1_not_in_skew_index 39285317/635376539 (6.183%) +num_buckets_in_skew_index 18897/635376539 (0.00297414%) +max_bucket_size 54496 +log2_max_bucket_size 16 +num_partitions in skew index 8 +num_minimizer_positions_of_buckets_larger_than_1 105735487/704332572 (15.0122%) +num_minimizer_positions_of_buckets_in_skew_index 2524760/704332572 (0.358461%) +=== step 7.1 (build sparse index): 9.97487 [sec] (0.730032 [ns/kmer]) + partition = 0: num kmers in buckets of size > 64 and <= 128: 20969688 + partition = 1: num kmers in buckets of size > 128 and <= 256: 10737757 + partition = 2: num kmers in buckets of size > 256 and <= 512: 4741531 + partition = 3: num kmers in buckets of size > 512 and <= 1024: 2486138 + partition = 4: num kmers in buckets of size > 1024 and <= 2048: 989727 + partition = 5: num kmers in buckets of size > 2048 and <= 4096: 943348 + partition = 6: num kmers in buckets of size > 4096 and <= 8192: 642370 + partition = 7: num kmers in buckets of size > 8192 and <= 54496: 1294795 +num kmers in skew index = 42805354 (0.31328%) + lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 20969688 + building MPHF with 64 threads and 7 partitions (avg. partition size = 3000000)... + built mphs[0] for 20969688 kmers; bits/key = 2.54235 + built positions[0] for 20969688 kmers; bits/key = 7.00002 + lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 10737757 + building MPHF with 64 threads and 4 partitions (avg. partition size = 3000000)... + built mphs[1] for 10737757 kmers; bits/key = 2.65204 + built positions[1] for 10737757 kmers; bits/key = 8.00003 + lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 4741531 + building MPHF with 64 threads and 2 partitions (avg. partition size = 3000000)... + built mphs[2] for 4741531 kmers; bits/key = 2.76709 + built positions[2] for 4741531 kmers; bits/key = 9.00007 + lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 2486138 + building MPHF with 64 threads and 1 partitions (avg. partition size = 2486138)... + built mphs[3] for 2486138 kmers; bits/key = 2.55994 + built positions[3] for 2486138 kmers; bits/key = 10.0002 + lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 989727 + building MPHF with 64 threads and 1 partitions (avg. partition size = 989727)... + built mphs[4] for 989727 kmers; bits/key = 2.41843 + built positions[4] for 989727 kmers; bits/key = 11.0004 + lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 943348 + building MPHF with 64 threads and 1 partitions (avg. partition size = 943348)... + built mphs[5] for 943348 kmers; bits/key = 2.41845 + built positions[5] for 943348 kmers; bits/key = 12.0004 + lower = 4096; upper = 8192; num_bits_per_pos = 13; num_kmers_in_partition = 642370 + building MPHF with 64 threads and 1 partitions (avg. partition size = 642370)... + built mphs[6] for 642370 kmers; bits/key = 2.41959 + built positions[6] for 642370 kmers; bits/key = 13.0006 + lower = 8192; upper = 54496; num_bits_per_pos = 16; num_kmers_in_partition = 1294795 + building MPHF with 64 threads and 1 partitions (avg. partition size = 1294795)... + built mphs[7] for 1294795 kmers; bits/key = 2.56074 + built positions[7] for 1294795 kmers; bits/key = 16.0003 +=== step 7.2 (build skew index): 8.57148 [sec] (0.627322 [ns/kmer]) +=== bucket statistics (less) === +buckets with 1 minimizer positions = 93.814% +buckets with 2 minimizer positions = 4.42799% +buckets with 3 minimizer positions = 1.00785% +buckets with 4 minimizer positions = 0.351026% +buckets with 5 minimizer positions = 0.151711% +buckets with 6 minimizer positions = 0.0750917% +buckets with 7 minimizer positions = 0.042181% +buckets with 8 minimizer positions = 0.0264498% +buckets with 9 minimizer positions = 0.0181631% +buckets with 10 minimizer positions = 0.0132882% +buckets with 11 minimizer positions = 0.0102552% +buckets with 12 minimizer positions = 0.00803933% +buckets with 13 minimizer positions = 0.00649237% +buckets with 14 minimizer positions = 0.00533274% +buckets with 15 minimizer positions = 0.0044295% +buckets with 16 minimizer positions = 0.00378972% +max_bucket_size = 54496 +=== step 7 (build sparse and skew index): 20.0887 [sec] (1.47023 [ns/kmer]) +=== total time: 262.971 [sec] (19.2461 [ns/kmer]) +total index size: 7528610111 [B] -- 7528.61 [MB] +SPACE BREAKDOWN: + mphf: 0.13127 [bits/kmer] (2.82294 [bits/key]) -- 2.97802% + strings_offsets: 0.136923 [bits/kmer] -- 3.10626% + control_codewords: 1.62755 [bits/kmer] -- 36.9228% + mid_load_buckets: 0.263108 [bits/kmer] -- 5.96891% + begin_buckets_of_size: 1.56913e-07 [bits/kmer] -- 3.55975e-06% + strings: 2.20901 [bits/kmer] -- 50.1139% + skew_index: 0.0401185 [bits/kmer] -- 0.910134% + weights: 1.07731e-07 [bits/kmer] -- 2.44401e-06% + -------------- + total: 4.40798 [bits/kmer] +2025-11-11 01:16:53: saving data structure to disk... +2025-11-11 01:17:26: DONE diff --git a/benchmarks/results-10-11-25/k63/regular-build.time.log b/benchmarks/results-10-11-25/k63/regular-build.time.log new file mode 100644 index 0000000..faa2e9b --- /dev/null +++ b/benchmarks/results-10-11-25/k63/regular-build.time.log @@ -0,0 +1,207 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k63.eulertigs.fa.gz -k 63 -m 24 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.sshash" + User time (seconds): 21.00 + System time (seconds): 5.04 + Percent of CPU this job got: 181% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:14.32 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 1528612 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 1167209 + Voluntary context switches: 30843 + Involuntary context switches: 7048 + Swaps: 0 + File system inputs: 349880 + File system outputs: 4440696 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k63.eulertigs.fa.gz -k 63 -m 24 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.sshash" + User time (seconds): 35.71 + System time (seconds): 8.30 + Percent of CPU this job got: 239% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:18.41 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 3226812 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 2000154 + Voluntary context switches: 28545 + Involuntary context switches: 7499 + Swaps: 0 + File system inputs: 658008 + File system outputs: 8951128 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.sshash" + User time (seconds): 166.38 + System time (seconds): 21.76 + Percent of CPU this job got: 302% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:02.16 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 7586552 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6180152 + Voluntary context switches: 30022 + Involuntary context switches: 8614 + Swaps: 0 + File system inputs: 1667336 + File system outputs: 22410192 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/axolotl.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/axolotl.k63.sshash" + User time (seconds): 2164.86 + System time (seconds): 227.97 + Percent of CPU this job got: 322% + Elapsed (wall clock) time (h:mm:ss or m:ss): 12:21.86 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 55227232 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 49182837 + Voluntary context switches: 419242 + Involuntary context switches: 33889 + Swaps: 0 + File system inputs: 16364008 + File system outputs: 230502424 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.sshash" + User time (seconds): 569.37 + System time (seconds): 63.67 + Percent of CPU this job got: 279% + Elapsed (wall clock) time (h:mm:ss or m:ss): 3:46.30 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 17447736 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 18463727 + Voluntary context switches: 57407 + Involuntary context switches: 13706 + Swaps: 0 + File system inputs: 5974376 + File system outputs: 63147912 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ec.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/ec.k63.sshash" + User time (seconds): 160.99 + System time (seconds): 21.39 + Percent of CPU this job got: 263% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:09.20 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 9279992 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6507955 + Voluntary context switches: 30742 + Involuntary context switches: 8708 + Swaps: 0 + File system inputs: 2288816 + File system outputs: 21177984 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.sshash" + User time (seconds): 97.80 + System time (seconds): 15.16 + Percent of CPU this job got: 225% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:50.00 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 5782684 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 4499938 + Voluntary context switches: 29589 + Involuntary context switches: 8741 + Swaps: 0 + File system inputs: 1680088 + File system outputs: 15833744 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k63.eulertigs.fa.gz -k 63 -m 23 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.sshash" + User time (seconds): 11.40 + System time (seconds): 3.48 + Percent of CPU this job got: 218% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:06.80 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 1134996 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 719329 + Voluntary context switches: 29642 + Involuntary context switches: 6076 + Swaps: 0 + File system inputs: 274408 + File system outputs: 3211544 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/jgi_fungi.batch-0.k63.eulertigs.fa.gz -k 63 -m 25 -g 16 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-indexes/jgi_fungi.batch-0.k63.sshash" + User time (seconds): 610.72 + System time (seconds): 99.38 + Percent of CPU this job got: 239% + Elapsed (wall clock) time (h:mm:ss or m:ss): 4:56.78 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 26375148 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 21111660 + Voluntary context switches: 98691 + Involuntary context switches: 17851 + Swaps: 0 + File system inputs: 8976432 + File system outputs: 136229984 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-10-11-25/k63/regular-streaming-queries-high-hit.json b/benchmarks/results-10-11-25/k63/regular-streaming-queries-high-hit.json new file mode 100644 index 0000000..e173a34 --- /dev/null +++ b/benchmarks/results-10-11-25/k63/regular-streaming-queries-high-hit.json @@ -0,0 +1,6 @@ +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz", "num_kmers": "97972416", "num_positive_kmers": "67275966", "num_negative_kmers": "30696450", "num_invalid_kmers": "0", "num_searches": "34801398", "num_extensions": "32474568", "elapsed_millisec": "7710"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz", "num_kmers": "461383839", "num_positive_kmers": "293470517", "num_negative_kmers": "167902332", "num_invalid_kmers": "10990", "num_searches": "149919071", "num_extensions": "143551446", "elapsed_millisec": "39619"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz", "num_kmers": "477818474", "num_positive_kmers": "406529529", "num_negative_kmers": "70615167", "num_invalid_kmers": "673778", "num_searches": "219935013", "num_extensions": "186594516", "elapsed_millisec": "89746"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/ncbi-queries.fastq.gz", "num_kmers": "10330949", "num_positive_kmers": "10230224", "num_negative_kmers": "99451", "num_invalid_kmers": "1274", "num_searches": "5430984", "num_extensions": "4799240", "elapsed_millisec": "961"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR27871075_1.fastq.gz", "num_kmers": "541466405", "num_positive_kmers": "507202856", "num_negative_kmers": "34238416", "num_invalid_kmers": "25133", "num_searches": "319174875", "num_extensions": "188027981", "elapsed_millisec": "342924"} +{"index_filename": "/mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.sshash", "query_filename": "/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz", "num_kmers": "477818474", "num_positive_kmers": "434532302", "num_negative_kmers": "42612394", "num_invalid_kmers": "673778", "num_searches": "238897243", "num_extensions": "195635059", "elapsed_millisec": "113149"} diff --git a/benchmarks/results-10-11-25/k63/regular-streaming-queries-high-hit.log b/benchmarks/results-10-11-25/k63/regular-streaming-queries-high-hit.log new file mode 100644 index 0000000..2ce58c3 --- /dev/null +++ b/benchmarks/results-10-11-25/k63/regular-streaming-queries-high-hit.log @@ -0,0 +1,66 @@ +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/cod.k63.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz +2025-11-11 21:09:53: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz'... +2025-11-11 21:10:00: DONE +==== query report: +num_kmers = 97972416 +num_positive_kmers = 67275966 (68.6683%) +num_negative_kmers = 30696450 (31.3317%) +num_invalid_kmers = 0 (0%) +num_searches = 34801398/67275966 (51.7293%) +num_extensions = 32474568/67275966 (48.2707%) +elapsed = 7.71 sec / 0.1285 min / 78.6956 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/kestrel.k63.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz +2025-11-11 21:10:01: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz'... +2025-11-11 21:10:40: DONE +==== query report: +num_kmers = 461383839 +num_positive_kmers = 293470517 (63.6066%) +num_negative_kmers = 167902332 (36.391%) +num_invalid_kmers = 10990 (0.00238196%) +num_searches = 149919071/293470517 (51.0849%) +num_extensions = 143551446/293470517 (48.9151%) +elapsed = 39.619 sec / 0.660317 min / 85.8699 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/human.k63.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz +2025-11-11 21:10:41: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2025-11-11 21:12:11: DONE +==== query report: +num_kmers = 477818474 +num_positive_kmers = 406529529 (85.0803%) +num_negative_kmers = 70615167 (14.7787%) +num_invalid_kmers = 673778 (0.141011%) +num_searches = 219935013/406529529 (54.1006%) +num_extensions = 186594516/406529529 (45.8994%) +elapsed = 89.746 sec / 1.49577 min / 187.824 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/ncbi-virus.k63.sshash -q /mnt/hd2/pibiri/DNA/queries/ncbi-queries.fastq.gz +2025-11-11 21:12:11: performing queries from file '/mnt/hd2/pibiri/DNA/queries/ncbi-queries.fastq.gz'... +2025-11-11 21:12:12: DONE +==== query report: +num_kmers = 10330949 +num_positive_kmers = 10230224 (99.025%) +num_negative_kmers = 99451 (0.962651%) +num_invalid_kmers = 1274 (0.0123319%) +num_searches = 5430984/10230224 (53.0876%) +num_extensions = 4799240/10230224 (46.9124%) +elapsed = 0.961 sec / 0.0160167 min / 93.0215 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/se.k63.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR27871075_1.fastq.gz +2025-11-11 21:12:12: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR27871075_1.fastq.gz'... +2025-11-11 21:17:55: DONE +==== query report: +num_kmers = 541466405 +num_positive_kmers = 507202856 (93.6721%) +num_negative_kmers = 34238416 (6.32328%) +num_invalid_kmers = 25133 (0.00464165%) +num_searches = 319174875/507202856 (62.9284%) +num_extensions = 188027981/507202856 (37.0716%) +elapsed = 342.924 sec / 5.7154 min / 633.325 ns/kmer +./sshash query -i /mnt/hd2/pibiri/DNA/sshash-indexes/hprc.k63.sshash -q /mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz +2025-11-11 21:17:57: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2025-11-11 21:19:51: DONE +==== query report: +num_kmers = 477818474 +num_positive_kmers = 434532302 (90.9409%) +num_negative_kmers = 42612394 (8.91811%) +num_invalid_kmers = 673778 (0.141011%) +num_searches = 238897243/434532302 (54.978%) +num_extensions = 195635059/434532302 (45.022%) +elapsed = 113.149 sec / 1.88582 min / 236.803 ns/kmer diff --git a/benchmarks/results-10-11-25/results.png b/benchmarks/results-10-11-25/results.png new file mode 100644 index 0000000..8bf8e47 Binary files /dev/null and b/benchmarks/results-10-11-25/results.png differ diff --git a/benchmarks/results-22-08-25/k31/22-08-25.canon.bench_log b/benchmarks/results-22-08-25/k31/22-08-25.canon.bench_log deleted file mode 100644 index 43d8ede..0000000 --- a/benchmarks/results-22-08-25/k31/22-08-25.canon.bench_log +++ /dev/null @@ -1,18 +0,0 @@ -avg_nanosec_per_positive_lookup 851.547 -avg_nanosec_per_negative_lookup 546.268 -avg_nanosec_per_positive_lookup_advanced 832.321 -avg_nanosec_per_negative_lookup_advanced 546.19 -avg_nanosec_per_access 438.159 -iterator: avg_nanosec_per_kmer 19.9811 -avg_nanosec_per_positive_lookup 825.457 -avg_nanosec_per_negative_lookup 618.156 -avg_nanosec_per_positive_lookup_advanced 806.324 -avg_nanosec_per_negative_lookup_advanced 617.424 -avg_nanosec_per_access 367.28 -iterator: avg_nanosec_per_kmer 19.8861 -avg_nanosec_per_positive_lookup 1150.02 -avg_nanosec_per_negative_lookup 678.759 -avg_nanosec_per_positive_lookup_advanced 1122.07 -avg_nanosec_per_negative_lookup_advanced 678.793 -avg_nanosec_per_access 617.36 -iterator: avg_nanosec_per_kmer 20.0176 diff --git a/benchmarks/results-22-08-25/k31/22-08-25.canon.build_log b/benchmarks/results-22-08-25/k31/22-08-25.canon.build_log deleted file mode 100644 index 01ee042..0000000 --- a/benchmarks/results-22-08-25/k31/22-08-25.canon.build_log +++ /dev/null @@ -1,519 +0,0 @@ -k = 31, m = 19, seed = 1, num_threads = 8, ram_limit_in_GiB = 16, l = 6, lambda = 7, canonical = true, weighted = false, verbose = true -reading file '/home/giulio/sshash_datasets/cod.k31.unitigs.fa.ust.fa.gz'... -read 100000 sequences, 27657185 bases, 24657215 kmers -read 200000 sequences, 54659411 bases, 48659441 kmers -read 300000 sequences, 82042065 bases, 73042095 kmers -read 400000 sequences, 109137740 bases, 97137770 kmers -read 500000 sequences, 135591611 bases, 120591641 kmers -read 600000 sequences, 162605989 bases, 144606019 kmers -read 700000 sequences, 189421730 bases, 168421760 kmers -read 800000 sequences, 214809276 bases, 190809306 kmers -read 900000 sequences, 240465564 bases, 213465594 kmers -read 1000000 sequences, 265968295 bases, 235968325 kmers -read 1100000 sequences, 291953235 bases, 258953265 kmers -read 1200000 sequences, 316684260 bases, 280684290 kmers -read 1300000 sequences, 341304215 bases, 302304245 kmers -read 1400000 sequences, 365519325 bases, 323519355 kmers -read 1500000 sequences, 388985606 bases, 343985636 kmers -read 1600000 sequences, 411705914 bases, 363705944 kmers -read 1700000 sequences, 434632801 bases, 383632831 kmers -read 1800000 sequences, 456140575 bases, 402140605 kmers -read 1900000 sequences, 477396896 bases, 420396926 kmers -read 2000000 sequences, 498761432 bases, 438761462 kmers -read 2100000 sequences, 518417062 bases, 455417092 kmers -read 2200000 sequences, 537930862 bases, 471930892 kmers -read 2300000 sequences, 556305688 bases, 487305718 kmers -read 2400000 sequences, 573938040 bases, 501938070 kmers -=== step 1.1: 'encoding_input' 2.8923 [sec] (5.75621 [ns/kmer]) -read 2406269 sequences, 574653270 bases, 502465200 kmers -num_kmers 502465200 -cost: 2.0 + 0.287336 [bits/kmer] -saving to file 'tmp_dir/sshash.tmp.run_1755868267671453525.minimizers.0.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868267671453525.minimizers.1.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868267671453525.minimizers.2.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868267671453525.minimizers.3.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868267671453525.minimizers.4.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868267671453525.minimizers.5.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868267671453525.minimizers.6.bin'... -=== step 1.2: 'computing_minimizers_tuples' 3.93549 [sec] (7.83236 [ns/kmer]) -=== step 1: 'parse_file' 6.82794 [sec] (13.5889 [ns/kmer]) - == files to merge = 7 -saving tuples to 'tmp_dir/sshash.tmp.run_1755868267671453525.minimizers.bin' -num_super_kmers = 50000000 -num_minimizers = 79497428 -num_minimizer_positions = 88981380 -num_super_kmers = 92581339 -building minimizers MPHF with 8 threads and 27 partitions (avg. partition size = 3000000)... -=== step 2: 'build_minimizers' 10.2729 [sec] (20.445 [ns/kmer]) -re-sorting minimizer tuples... -saving to file 'tmp_dir/sshash.tmp.run_1755868267671453525.minimizers.0.bin'... -=== step 2.1: 're-sorting minimizers tuples' 4.93769 [sec] (9.82693 [ns/kmer]) -bits_per_offset = ceil(log2(574653302)) = 30 -reading from 'tmp_dir/sshash.tmp.run_1755868267671453525.minimizers.bin'... -sorting buffer... -saving to file 'tmp_dir/sshash.tmp.run_1755868289780971210.bucket_pairs.0.bin'... -num_singletons 76295418/79497428 (95.9722%) -building: 0.225122 [sec] -computing minimizers offsets: 0.220248 [sec] -encoding: 0.023241 [sec] -=== step 3: 'build_sparse_index' 0.915665 [sec] (1.82235 [ns/kmer]) -max_bucket_size 257522 -log2_max_bucket_size 18 -num_buckets_in_skew_index 7311/79497428 (0.00919652%) -num_partitions 7 -computing sizes of partitions... - partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 2248357 - partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 1370324 - partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 1150821 - partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 912964 - partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 540380 - partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 486826 - partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 257522: 1682941 -num_kmers_in_skew_index 8392613 (1.67029%) -building partitions... - lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 2248357 - building MPHF with 8 threads and 1 partitions (avg. partition size = 2248357)... - built mphs[0] for 2248357 kmers; bits/key = 2.34415 - built positions[0] for 2248357 kmers; bits/key = 7.00017 - lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 1370324 - building MPHF with 8 threads and 1 partitions (avg. partition size = 1370324)... - built mphs[1] for 1370324 kmers; bits/key = 2.3762 - built positions[1] for 1370324 kmers; bits/key = 8.00026 - lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 1150821 - building MPHF with 8 threads and 1 partitions (avg. partition size = 1150821)... - built mphs[2] for 1150821 kmers; bits/key = 2.39401 - built positions[2] for 1150821 kmers; bits/key = 9.00032 - lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 912964 - building MPHF with 8 threads and 1 partitions (avg. partition size = 912964)... - built mphs[3] for 912964 kmers; bits/key = 2.37507 - built positions[3] for 912964 kmers; bits/key = 10.0004 - lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 540380 - building MPHF with 8 threads and 1 partitions (avg. partition size = 540380)... - built mphs[4] for 540380 kmers; bits/key = 2.42893 - built positions[4] for 540380 kmers; bits/key = 11.0006 - lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 486826 - building MPHF with 8 threads and 1 partitions (avg. partition size = 486826)... - built mphs[5] for 486826 kmers; bits/key = 2.44339 - built positions[5] for 486826 kmers; bits/key = 12.0007 - lower = 4096; upper = 257522; num_bits_per_pos = 18; num_kmers_in_partition = 1682941 - building MPHF with 8 threads and 1 partitions (avg. partition size = 1682941)... - built mphs[6] for 1682941 kmers; bits/key = 2.36554 - built positions[6] for 1682941 kmers; bits/key = 18.0002 -num_bits_for_skew_index 108203008(0.215344 [bits/kmer]) -=== step 4: 'build_skew_index' 10.9446 [sec] (21.7818 [ns/kmer]) -=== total_time 33.8988 [sec] (67.465 [ns/kmer]) -total index size: 537058306 [B] -- 537.058 [MB] -SPACE BREAKDOWN: - minimizers: 0.414084 [bits/kmer] (2.61723 [bits/key]) -- 4.84265% - pieces: 0.0549683 [bits/kmer] -- 0.642846% - sizes: 0.266349 [bits/kmer] -- 3.11491% - offsets: 5.31269 [bits/kmer] -- 62.1311% - strings: 2.28734 [bits/kmer] -- 26.75% - skew_index: 0.215344 [bits/kmer] -- 2.51842% - weights: 2.92956e-06 [bits/kmer] -- 3.42607e-05% - -------------- - total: 8.55077 [bits/kmer] - === bucket statistics (less) === -buckets with 1 minimizer positions = 95.9722% -buckets with 2 minimizer positions = 2.50518% -buckets with 3 minimizer positions = 0.677682% -buckets with 4 minimizer positions = 0.290043% -buckets with 5 minimizer positions = 0.153082% -buckets with 6 minimizer positions = 0.0915476% -buckets with 7 minimizer positions = 0.0610259% -buckets with 8 minimizer positions = 0.0427511% -buckets with 9 minimizer positions = 0.0309167% -buckets with 10 minimizer positions = 0.0241329% -buckets with 11 minimizer positions = 0.0189478% -buckets with 12 minimizer positions = 0.0152961% -buckets with 13 minimizer positions = 0.0126696% -buckets with 14 minimizer positions = 0.0103953% -buckets with 15 minimizer positions = 0.0089223% -buckets with 16 minimizer positions = 0.00736124% -max_bucket_size 257522 -2025-08-22 15:11:41: saving data structure to disk... -2025-08-22 15:11:41: DONE -k = 31, m = 19, seed = 1, num_threads = 8, ram_limit_in_GiB = 16, l = 6, lambda = 7, canonical = true, weighted = false, verbose = true -reading file '/home/giulio/sshash_datasets/kestrel.k31.unitigs.fa.ust.fa.gz'... -read 100000 sequences, 158452704 bases, 155452734 kmers -read 200000 sequences, 318155284 bases, 312155314 kmers -read 300000 sequences, 484429755 bases, 475429785 kmers -read 400000 sequences, 657902795 bases, 645902825 kmers -read 500000 sequences, 832407154 bases, 817407184 kmers -read 600000 sequences, 1004979671 bases, 986979701 kmers -=== step 1.1: 'encoding_input' 5.58902 [sec] (4.85833 [ns/kmer]) -read 682245 sequences, 1170866555 bases, 1150399205 kmers -num_kmers 1150399205 -cost: 2.0 + 0.035583 [bits/kmer] -saving to file 'tmp_dir/sshash.tmp.run_1755868301923422455.minimizers.0.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868301923422455.minimizers.1.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868301923422455.minimizers.2.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868301923422455.minimizers.3.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868301923422455.minimizers.4.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868301923422455.minimizers.5.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868301923422455.minimizers.6.bin'... -=== step 1.2: 'computing_minimizers_tuples' 8.64104 [sec] (7.51134 [ns/kmer]) -=== step 1: 'parse_file' 14.2302 [sec] (12.3697 [ns/kmer]) - == files to merge = 7 -saving tuples to 'tmp_dir/sshash.tmp.run_1755868301923422455.minimizers.bin' -num_super_kmers = 50000000 -num_super_kmers = 100000000 -num_super_kmers = 150000000 -num_super_kmers = 200000000 -num_minimizers = 194060478 -num_minimizer_positions = 200550710 -num_super_kmers = 209203487 -building minimizers MPHF with 8 threads and 65 partitions (avg. partition size = 3000000)... -=== step 2: 'build_minimizers' 23.852 [sec] (20.7336 [ns/kmer]) -re-sorting minimizer tuples... -saving to file 'tmp_dir/sshash.tmp.run_1755868301923422455.minimizers.0.bin'... -=== step 2.1: 're-sorting minimizers tuples' 17.9844 [sec] (15.6332 [ns/kmer]) -bits_per_offset = ceil(log2(1170866587)) = 31 -reading from 'tmp_dir/sshash.tmp.run_1755868301923422455.minimizers.bin'... -sorting buffer... -saving to file 'tmp_dir/sshash.tmp.run_1755868358305024462.bucket_pairs.0.bin'... -num_singletons 190204211/194060478 (98.0129%) -building: 0.522711 [sec] -computing minimizers offsets: 0.431731 [sec] -encoding: 0.006292 [sec] -=== step 3: 'build_sparse_index' 2.05202 [sec] (1.78374 [ns/kmer]) -max_bucket_size 4596 -log2_max_bucket_size 13 -num_buckets_in_skew_index 3610/194060478 (0.00186024%) -num_partitions 7 -computing sizes of partitions... - partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 1215283 - partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 758996 - partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 448292 - partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 299144 - partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 146091 - partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 69238 - partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 4596: 20320 -num_kmers_in_skew_index 2957364 (0.257073%) -building partitions... - lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 1215283 - building MPHF with 8 threads and 1 partitions (avg. partition size = 1215283)... - built mphs[0] for 1215283 kmers; bits/key = 2.39053 - built positions[0] for 1215283 kmers; bits/key = 7.00029 - lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 758996 - building MPHF with 8 threads and 1 partitions (avg. partition size = 758996)... - built mphs[1] for 758996 kmers; bits/key = 2.39428 - built positions[1] for 758996 kmers; bits/key = 8.00046 - lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 448292 - building MPHF with 8 threads and 1 partitions (avg. partition size = 448292)... - built mphs[2] for 448292 kmers; bits/key = 2.36442 - built positions[2] for 448292 kmers; bits/key = 9.00085 - lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 299144 - building MPHF with 8 threads and 1 partitions (avg. partition size = 299144)... - built mphs[3] for 299144 kmers; bits/key = 2.42404 - built positions[3] for 299144 kmers; bits/key = 10.0012 - lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 146091 - building MPHF with 8 threads and 1 partitions (avg. partition size = 146091)... - built mphs[4] for 146091 kmers; bits/key = 2.54658 - built positions[4] for 146091 kmers; bits/key = 11.0025 - lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 69238 - building MPHF with 8 threads and 1 partitions (avg. partition size = 69238)... - built mphs[5] for 69238 kmers; bits/key = 2.79315 - built positions[5] for 69238 kmers; bits/key = 12.0054 - lower = 4096; upper = 4596; num_bits_per_pos = 13; num_kmers_in_partition = 20320 - building MPHF with 8 threads and 1 partitions (avg. partition size = 20320)... - built mphs[6] for 20320 kmers; bits/key = 3.1189 - built positions[6] for 20320 kmers; bits/key = 13.0173 -num_bits_for_skew_index 31446064(0.0273349 [bits/kmer]) -=== step 4: 'build_skew_index' 4.9767 [sec] (4.32606 [ns/kmer]) -=== total_time 63.0952 [sec] (54.8464 [ns/kmer]) -total index size: 1177377532 [B] -- 1177.38 [MB] -SPACE BREAKDOWN: - minimizers: 0.442785 [bits/kmer] (2.62485 [bits/key]) -- 5.40799% - pieces: 0.00841122 [bits/kmer] -- 0.102731% - sizes: 0.26922 [bits/kmer] -- 3.28814% - offsets: 5.40427 [bits/kmer] -- 66.0055% - strings: 2.03558 [bits/kmer] -- 24.8618% - skew_index: 0.0273349 [bits/kmer] -- 0.333857% - weights: 1.27956e-06 [bits/kmer] -- 1.5628e-05% - -------------- - total: 8.18761 [bits/kmer] - === bucket statistics (less) === -buckets with 1 minimizer positions = 98.0129% -buckets with 2 minimizer positions = 1.66443% -buckets with 3 minimizer positions = 0.16605% -buckets with 4 minimizer positions = 0.0545351% -buckets with 5 minimizer positions = 0.0276455% -buckets with 6 minimizer positions = 0.0167922% -buckets with 7 minimizer positions = 0.0113021% -buckets with 8 minimizer positions = 0.00793722% -buckets with 9 minimizer positions = 0.00581262% -buckets with 10 minimizer positions = 0.00442542% -buckets with 11 minimizer positions = 0.0035762% -buckets with 12 minimizer positions = 0.00283056% -buckets with 13 minimizer positions = 0.00231526% -buckets with 14 minimizer positions = 0.00193806% -buckets with 15 minimizer positions = 0.00159744% -buckets with 16 minimizer positions = 0.00142327% -max_bucket_size 4596 -2025-08-22 15:12:45: saving data structure to disk... -2025-08-22 15:12:45: DONE -k = 31, m = 20, seed = 1, num_threads = 8, ram_limit_in_GiB = 16, l = 6, lambda = 7, canonical = true, weighted = false, verbose = true -reading file '/home/giulio/sshash_datasets/human.k31.unitigs.fa.ust.fa.gz'... -read 100000 sequences, 21756494 bases, 18756524 kmers -read 200000 sequences, 43415082 bases, 37415112 kmers -read 300000 sequences, 65780160 bases, 56780190 kmers -read 400000 sequences, 87471486 bases, 75471516 kmers -read 500000 sequences, 109226866 bases, 94226896 kmers -read 600000 sequences, 131648019 bases, 113648049 kmers -read 700000 sequences, 153316395 bases, 132316425 kmers -read 800000 sequences, 176702606 bases, 152702636 kmers -read 900000 sequences, 198938131 bases, 171938161 kmers -read 1000000 sequences, 220949217 bases, 190949247 kmers -read 1100000 sequences, 244049472 bases, 211049502 kmers -read 1200000 sequences, 267074826 bases, 231074856 kmers -read 1300000 sequences, 288917293 bases, 249917323 kmers -read 1400000 sequences, 311411250 bases, 269411280 kmers -read 1500000 sequences, 333749577 bases, 288749607 kmers -read 1600000 sequences, 356425695 bases, 308425725 kmers -read 1700000 sequences, 378782863 bases, 327782893 kmers -read 1800000 sequences, 401743561 bases, 347743591 kmers -read 1900000 sequences, 424135183 bases, 367135213 kmers -read 2000000 sequences, 446879049 bases, 386879079 kmers -read 2100000 sequences, 468734269 bases, 405734299 kmers -read 2200000 sequences, 490603237 bases, 424603267 kmers -read 2300000 sequences, 513905163 bases, 444905193 kmers -read 2400000 sequences, 537439218 bases, 465439248 kmers -read 2500000 sequences, 560503353 bases, 485503383 kmers -read 2600000 sequences, 583973645 bases, 505973675 kmers -read 2700000 sequences, 606563596 bases, 525563626 kmers -read 2800000 sequences, 629471143 bases, 545471173 kmers -read 2900000 sequences, 653036453 bases, 566036483 kmers -read 3000000 sequences, 676313831 bases, 586313861 kmers -read 3100000 sequences, 700155099 bases, 607155129 kmers -read 3200000 sequences, 723256428 bases, 627256458 kmers -read 3300000 sequences, 745736801 bases, 646736831 kmers -read 3400000 sequences, 768718458 bases, 666718488 kmers -read 3500000 sequences, 791774449 bases, 686774479 kmers -read 3600000 sequences, 814593816 bases, 706593846 kmers -read 3700000 sequences, 838379879 bases, 727379909 kmers -read 3800000 sequences, 860634933 bases, 746634963 kmers -read 3900000 sequences, 884047075 bases, 767047105 kmers -read 4000000 sequences, 907479686 bases, 787479716 kmers -read 4100000 sequences, 930543338 bases, 807543368 kmers -read 4200000 sequences, 953267462 bases, 827267492 kmers -read 4300000 sequences, 976927833 bases, 847927863 kmers -read 4400000 sequences, 1000738248 bases, 868738278 kmers -read 4500000 sequences, 1024531209 bases, 889531239 kmers -read 4600000 sequences, 1047717223 bases, 909717253 kmers -read 4700000 sequences, 1071027493 bases, 930027523 kmers -read 4800000 sequences, 1094084552 bases, 950084582 kmers -read 4900000 sequences, 1117956137 bases, 970956167 kmers -read 5000000 sequences, 1141125024 bases, 991125054 kmers -read 5100000 sequences, 1164319107 bases, 1011319137 kmers -read 5200000 sequences, 1187390563 bases, 1031390593 kmers -read 5300000 sequences, 1211053129 bases, 1052053159 kmers -read 5400000 sequences, 1234026672 bases, 1072026702 kmers -read 5500000 sequences, 1257295769 bases, 1092295799 kmers -read 5600000 sequences, 1279760698 bases, 1111760728 kmers -read 5700000 sequences, 1303188807 bases, 1132188837 kmers -read 5800000 sequences, 1326902250 bases, 1152902280 kmers -read 5900000 sequences, 1350165652 bases, 1173165682 kmers -read 6000000 sequences, 1373185888 bases, 1193185918 kmers -read 6100000 sequences, 1396446455 bases, 1213446485 kmers -read 6200000 sequences, 1420887422 bases, 1234887452 kmers -read 6300000 sequences, 1444176084 bases, 1255176114 kmers -read 6400000 sequences, 1467761803 bases, 1275761833 kmers -read 6500000 sequences, 1491517469 bases, 1296517499 kmers -read 6600000 sequences, 1515086149 bases, 1317086179 kmers -read 6700000 sequences, 1537506202 bases, 1336506232 kmers -read 6800000 sequences, 1560946391 bases, 1356946421 kmers -read 6900000 sequences, 1583945488 bases, 1376945518 kmers -read 7000000 sequences, 1606825999 bases, 1396826029 kmers -read 7100000 sequences, 1630889570 bases, 1417889600 kmers -read 7200000 sequences, 1653502338 bases, 1437502368 kmers -read 7300000 sequences, 1676836074 bases, 1457836104 kmers -read 7400000 sequences, 1700581893 bases, 1478581923 kmers -read 7500000 sequences, 1723991216 bases, 1498991246 kmers -read 7600000 sequences, 1747267694 bases, 1519267724 kmers -read 7700000 sequences, 1769997128 bases, 1538997158 kmers -read 7800000 sequences, 1793090725 bases, 1559090755 kmers -read 7900000 sequences, 1816572169 bases, 1579572199 kmers -read 8000000 sequences, 1839912405 bases, 1599912435 kmers -read 8100000 sequences, 1863462142 bases, 1620462172 kmers -read 8200000 sequences, 1887689183 bases, 1641689213 kmers -read 8300000 sequences, 1911245326 bases, 1662245356 kmers -read 8400000 sequences, 1934891774 bases, 1682891804 kmers -read 8500000 sequences, 1958265360 bases, 1703265390 kmers -read 8600000 sequences, 1981114530 bases, 1723114560 kmers -read 8700000 sequences, 2005160537 bases, 1744160567 kmers -read 8800000 sequences, 2028270585 bases, 1764270615 kmers -read 8900000 sequences, 2051383223 bases, 1784383253 kmers -read 9000000 sequences, 2074474142 bases, 1804474172 kmers -read 9100000 sequences, 2096460797 bases, 1823460827 kmers -read 9200000 sequences, 2119508963 bases, 1843508993 kmers -read 9300000 sequences, 2142674047 bases, 1863674077 kmers -read 9400000 sequences, 2164744513 bases, 1882744543 kmers -read 9500000 sequences, 2187346618 bases, 1902346648 kmers -read 9600000 sequences, 2209977347 bases, 1921977377 kmers -read 9700000 sequences, 2232411621 bases, 1941411651 kmers -read 9800000 sequences, 2254902856 bases, 1960902886 kmers -read 9900000 sequences, 2277498326 bases, 1980498356 kmers -read 10000000 sequences, 2299499706 bases, 1999499736 kmers -read 10100000 sequences, 2321026083 bases, 2018026113 kmers -read 10200000 sequences, 2343732502 bases, 2037732532 kmers -read 10300000 sequences, 2364673146 bases, 2055673176 kmers -read 10400000 sequences, 2386756547 bases, 2074756577 kmers -read 10500000 sequences, 2407972829 bases, 2092972859 kmers -read 10600000 sequences, 2429720496 bases, 2111720526 kmers -read 10700000 sequences, 2450999850 bases, 2129999880 kmers -read 10800000 sequences, 2471991237 bases, 2147991267 kmers -read 10900000 sequences, 2492905716 bases, 2165905746 kmers -read 11000000 sequences, 2514209537 bases, 2184209567 kmers -read 11100000 sequences, 2535680432 bases, 2202680462 kmers -read 11200000 sequences, 2556527106 bases, 2220527136 kmers -read 11300000 sequences, 2577149683 bases, 2238149713 kmers -read 11400000 sequences, 2597185570 bases, 2255185600 kmers -read 11500000 sequences, 2617632290 bases, 2272632320 kmers -read 11600000 sequences, 2638223488 bases, 2290223518 kmers -read 11700000 sequences, 2658091095 bases, 2307091125 kmers -read 11800000 sequences, 2676727586 bases, 2322727616 kmers -read 11900000 sequences, 2695970855 bases, 2338970885 kmers -read 12000000 sequences, 2714887902 bases, 2354887932 kmers -read 12100000 sequences, 2733216523 bases, 2370216553 kmers -read 12200000 sequences, 2751654575 bases, 2385654605 kmers -read 12300000 sequences, 2769824681 bases, 2400824711 kmers -read 12400000 sequences, 2788136547 bases, 2416136577 kmers -read 12500000 sequences, 2806066941 bases, 2431066971 kmers -read 12600000 sequences, 2823691733 bases, 2445691763 kmers -read 12700000 sequences, 2841752082 bases, 2460752112 kmers -read 12800000 sequences, 2859305620 bases, 2475305650 kmers -read 12900000 sequences, 2876642001 bases, 2489642031 kmers -read 13000000 sequences, 2893631233 bases, 2503631263 kmers -=== step 1.1: 'encoding_input' 14.6222 [sec] (5.83616 [ns/kmer]) -read 13014214 sequences, 2895872181 bases, 2505445761 kmers -num_kmers 2505445761 -cost: 2.0 + 0.311662 [bits/kmer] -saving to file 'tmp_dir/sshash.tmp.run_1755868365759028075.minimizers.0.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868365759028075.minimizers.1.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868365759028075.minimizers.2.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868365759028075.minimizers.3.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868365759028075.minimizers.4.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868365759028075.minimizers.5.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868365759028075.minimizers.6.bin'... -=== step 1.2: 'computing_minimizers_tuples' 18.7553 [sec] (7.48581 [ns/kmer]) -=== step 1: 'parse_file' 33.3776 [sec] (13.322 [ns/kmer]) - == files to merge = 7 -saving tuples to 'tmp_dir/sshash.tmp.run_1755868365759028075.minimizers.bin' -num_super_kmers = 50000000 -num_super_kmers = 100000000 -num_super_kmers = 150000000 -num_super_kmers = 200000000 -num_super_kmers = 250000000 -num_super_kmers = 300000000 -num_super_kmers = 350000000 -num_super_kmers = 400000000 -num_super_kmers = 450000000 -num_minimizers = 418360623 -num_minimizer_positions = 475672736 -num_super_kmers = 494602342 -building minimizers MPHF with 8 threads and 140 partitions (avg. partition size = 3000000)... -=== step 2: 'build_minimizers' 56.7126 [sec] (22.6357 [ns/kmer]) -re-sorting minimizer tuples... -saving to file 'tmp_dir/sshash.tmp.run_1755868365759028075.minimizers.0.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868365759028075.minimizers.1.bin'... - == files to merge = 2 -saving tuples to 'tmp_dir/sshash.tmp.run_1755868365759028075.minimizers.bin' -num_super_kmers = 50000000 -num_super_kmers = 100000000 -num_super_kmers = 150000000 -num_super_kmers = 200000000 -num_super_kmers = 250000000 -num_super_kmers = 300000000 -num_super_kmers = 350000000 -num_super_kmers = 400000000 -num_super_kmers = 450000000 -num_minimizers = 418360623 -num_minimizer_positions = 475672736 -num_super_kmers = 494602342 -=== step 2.1: 're-sorting minimizers tuples' 56.9092 [sec] (22.7142 [ns/kmer]) -bits_per_offset = ceil(log2(2895872213)) = 32 -reading from 'tmp_dir/sshash.tmp.run_1755868365759028075.minimizers.bin'... -sorting buffer... -saving to file 'tmp_dir/sshash.tmp.run_1755868513137485517.bucket_pairs.0.bin'... -num_singletons 401627906/418360623 (96.0004%) -building: 1.19106 [sec] -computing minimizers offsets: 0.895773 [sec] -encoding: 0.119168 [sec] -=== step 3: 'build_sparse_index' 4.60948 [sec] (1.83979 [ns/kmer]) -max_bucket_size 20140 -log2_max_bucket_size 15 -num_buckets_in_skew_index 75633/418360623 (0.0180784%) -num_partitions 7 -computing sizes of partitions... - partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 18467007 - partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 14212219 - partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 10399161 - partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 7124477 - partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 4413430 - partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 3140606 - partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 20140: 2808524 -num_kmers_in_skew_index 60565424 (2.41735%) -building partitions... - lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 18467007 - building MPHF with 8 threads and 7 partitions (avg. partition size = 3000000)... - built mphs[0] for 18467007 kmers; bits/key = 2.3997 - built positions[0] for 18467007 kmers; bits/key = 7.00002 - lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 14212219 - building MPHF with 8 threads and 5 partitions (avg. partition size = 3000000)... - built mphs[1] for 14212219 kmers; bits/key = 2.39549 - built positions[1] for 14212219 kmers; bits/key = 8.00003 - lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 10399161 - building MPHF with 8 threads and 4 partitions (avg. partition size = 3000000)... - built mphs[2] for 10399161 kmers; bits/key = 2.42642 - built positions[2] for 10399161 kmers; bits/key = 9.00004 - lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 7124477 - building MPHF with 8 threads and 3 partitions (avg. partition size = 3000000)... - built mphs[3] for 7124477 kmers; bits/key = 2.55041 - built positions[3] for 7124477 kmers; bits/key = 10 - lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 4413430 - building MPHF with 8 threads and 2 partitions (avg. partition size = 3000000)... - built mphs[4] for 4413430 kmers; bits/key = 2.57025 - built positions[4] for 4413430 kmers; bits/key = 11.0001 - lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 3140606 - building MPHF with 8 threads and 2 partitions (avg. partition size = 3000000)... - built mphs[5] for 3140606 kmers; bits/key = 3.07188 - built positions[5] for 3140606 kmers; bits/key = 12.0001 - lower = 4096; upper = 20140; num_bits_per_pos = 15; num_kmers_in_partition = 2808524 - building MPHF with 8 threads and 1 partitions (avg. partition size = 2808524)... - built mphs[6] for 2808524 kmers; bits/key = 2.33795 - built positions[6] for 2808524 kmers; bits/key = 15.0001 -num_bits_for_skew_index 685490320(0.2736 [bits/kmer]) -=== step 4: 'build_skew_index' 21.0846 [sec] (8.41553 [ns/kmer]) -=== total_time 172.694 [sec] (68.9273 [ns/kmer]) -total index size: 2956652772 [B] -- 2956.65 [MB] -SPACE BREAKDOWN: - minimizers: 0.437715 [bits/kmer] (2.62135 [bits/key]) -- 4.63645% - pieces: 0.0585865 [bits/kmer] -- 0.620573% - sizes: 0.283783 [bits/kmer] -- 3.00594% - offsets: 6.07538 [bits/kmer] -- 64.3529% - strings: 2.31166 [bits/kmer] -- 24.4861% - skew_index: 0.2736 [bits/kmer] -- 2.89808% - weights: 5.8752e-07 [bits/kmer] -- 6.22325e-06% - -------------- - total: 9.44072 [bits/kmer] - === bucket statistics (less) === -buckets with 1 minimizer positions = 96.0004% -buckets with 2 minimizer positions = 2.56942% -buckets with 3 minimizer positions = 0.58% -buckets with 4 minimizer positions = 0.252326% -buckets with 5 minimizer positions = 0.141037% -buckets with 6 minimizer positions = 0.0897776% -buckets with 7 minimizer positions = 0.0621464% -buckets with 8 minimizer positions = 0.0453446% -buckets with 9 minimizer positions = 0.0348078% -buckets with 10 minimizer positions = 0.0270769% -buckets with 11 minimizer positions = 0.0216744% -buckets with 12 minimizer positions = 0.0178129% -buckets with 13 minimizer positions = 0.0150052% -buckets with 14 minimizer positions = 0.0125844% -buckets with 15 minimizer positions = 0.0107302% -buckets with 16 minimizer positions = 0.00926139% -max_bucket_size 20140 -2025-08-22 15:15:38: saving data structure to disk... -2025-08-22 15:15:40: DONE diff --git a/benchmarks/results-22-08-25/k31/22-08-25.canon.high-hit.streaming_query_log b/benchmarks/results-22-08-25/k31/22-08-25.canon.high-hit.streaming_query_log deleted file mode 100644 index def2335..0000000 --- a/benchmarks/results-22-08-25/k31/22-08-25.canon.high-hit.streaming_query_log +++ /dev/null @@ -1,30 +0,0 @@ -2025-08-22 15:27:26: performing queries from file '/home/giulio/sshash_queries/SRR12858649.fastq.gz'... -2025-08-22 15:27:39: DONE -==== query report: -num_kmers = 163287360 -num_positive_kmers = 132860997 (81.3664%) -num_negative_kmers = 30426363 (18.6336%) -num_invalid_kmers = 0 (0%) -num_searches = 7886675/132860997 (5.93603%) -num_extensions = 124974322/132860997 (94.064%) -elapsed = 12803.2 millisec / 12.8032 sec / 0.213386 min / 78.4089 ns/kmer -2025-08-22 15:27:40: performing queries from file '/home/giulio/sshash_queries/SRR11449743_1.fastq.gz'... -2025-08-22 15:28:51: DONE -==== query report: -num_kmers = 695737535 -num_positive_kmers = 525542891 (75.5375%) -num_negative_kmers = 170183654 (24.4609%) -num_invalid_kmers = 10990 (0.00157962%) -num_searches = 13101335/525542891 (2.49291%) -num_extensions = 512441556/525542891 (97.5071%) -elapsed = 71394.1 millisec / 71.3941 sec / 1.1899 min / 102.616 ns/kmer -2025-08-22 15:28:52: performing queries from file '/home/giulio/sshash_queries/SRR5833294.fastq.gz'... -2025-08-22 15:32:45: DONE -==== query report: -num_kmers = 1569974986 -num_positive_kmers = 1437870528 (91.5856%) -num_negative_kmers = 131075447 (8.34889%) -num_invalid_kmers = 1029011 (0.0655431%) -num_searches = 110228289/1437870528 (7.66608%) -num_extensions = 1327642239/1437870528 (92.3339%) -elapsed = 233067 millisec / 233.067 sec / 3.88445 min / 148.453 ns/kmer diff --git a/benchmarks/results-22-08-25/k31/22-08-25.canon.low-hit.streaming_query_log b/benchmarks/results-22-08-25/k31/22-08-25.canon.low-hit.streaming_query_log deleted file mode 100644 index 5c8d9b2..0000000 --- a/benchmarks/results-22-08-25/k31/22-08-25.canon.low-hit.streaming_query_log +++ /dev/null @@ -1,30 +0,0 @@ -2025-08-22 15:36:57: performing queries from file '/home/giulio/sshash_queries/SRR11449743_1.fastq.gz'... -2025-08-22 15:38:39: DONE -==== query report: -num_kmers = 695737535 -num_positive_kmers = 4754204 (0.683333%) -num_negative_kmers = 690972341 (99.3151%) -num_invalid_kmers = 10990 (0.00157962%) -num_searches = 3613872/4754204 (76.0142%) -num_extensions = 1140332/4754204 (23.9858%) -elapsed = 102225 millisec / 102.225 sec / 1.70375 min / 146.93 ns/kmer -2025-08-22 15:38:40: performing queries from file '/home/giulio/sshash_queries/SRR12858649.fastq.gz'... -2025-08-22 15:38:51: DONE -==== query report: -num_kmers = 163287360 -num_positive_kmers = 790414 (0.484063%) -num_negative_kmers = 162496946 (99.5159%) -num_invalid_kmers = 0 (0%) -num_searches = 499575/790414 (63.2042%) -num_extensions = 290839/790414 (36.7958%) -elapsed = 10896.7 millisec / 10.8967 sec / 0.181611 min / 66.7331 ns/kmer -2025-08-22 15:38:52: performing queries from file '/home/giulio/sshash_queries/SRR5901135_1.fastq.gz'... -2025-08-22 15:39:49: DONE -==== query report: -num_kmers = 395433242 -num_positive_kmers = 1134 (0.000286774%) -num_negative_kmers = 395408021 (99.9936%) -num_invalid_kmers = 24087 (0.00609129%) -num_searches = 895/1134 (78.9242%) -num_extensions = 239/1134 (21.0758%) -elapsed = 57071.6 millisec / 57.0716 sec / 0.951193 min / 144.327 ns/kmer diff --git a/benchmarks/results-22-08-25/k31/22-08-25.regular.bench_log b/benchmarks/results-22-08-25/k31/22-08-25.regular.bench_log deleted file mode 100644 index 3738874..0000000 --- a/benchmarks/results-22-08-25/k31/22-08-25.regular.bench_log +++ /dev/null @@ -1,18 +0,0 @@ -avg_nanosec_per_positive_lookup 1025.96 -avg_nanosec_per_negative_lookup 967.926 -avg_nanosec_per_positive_lookup_advanced 1026.55 -avg_nanosec_per_negative_lookup_advanced 982.257 -avg_nanosec_per_access 433.153 -iterator: avg_nanosec_per_kmer 19.9873 -avg_nanosec_per_positive_lookup 1029.57 -avg_nanosec_per_negative_lookup 1113.39 -avg_nanosec_per_positive_lookup_advanced 1025.88 -avg_nanosec_per_negative_lookup_advanced 1113.5 -avg_nanosec_per_access 368.153 -iterator: avg_nanosec_per_kmer 19.7098 -avg_nanosec_per_positive_lookup 1433.59 -avg_nanosec_per_negative_lookup 1237.58 -avg_nanosec_per_positive_lookup_advanced 1431.06 -avg_nanosec_per_negative_lookup_advanced 1237.18 -avg_nanosec_per_access 615.355 -iterator: avg_nanosec_per_kmer 20.0418 diff --git a/benchmarks/results-22-08-25/k31/22-08-25.regular.build_log b/benchmarks/results-22-08-25/k31/22-08-25.regular.build_log deleted file mode 100644 index b236841..0000000 --- a/benchmarks/results-22-08-25/k31/22-08-25.regular.build_log +++ /dev/null @@ -1,497 +0,0 @@ -k = 31, m = 20, seed = 1, num_threads = 8, ram_limit_in_GiB = 16, l = 6, lambda = 7, canonical = false, weighted = false, verbose = true -reading file '/home/giulio/sshash_datasets/cod.k31.unitigs.fa.ust.fa.gz'... -read 100000 sequences, 27657185 bases, 24657215 kmers -read 200000 sequences, 54659411 bases, 48659441 kmers -read 300000 sequences, 82042065 bases, 73042095 kmers -read 400000 sequences, 109137740 bases, 97137770 kmers -read 500000 sequences, 135591611 bases, 120591641 kmers -read 600000 sequences, 162605989 bases, 144606019 kmers -read 700000 sequences, 189421730 bases, 168421760 kmers -read 800000 sequences, 214809276 bases, 190809306 kmers -read 900000 sequences, 240465564 bases, 213465594 kmers -read 1000000 sequences, 265968295 bases, 235968325 kmers -read 1100000 sequences, 291953235 bases, 258953265 kmers -read 1200000 sequences, 316684260 bases, 280684290 kmers -read 1300000 sequences, 341304215 bases, 302304245 kmers -read 1400000 sequences, 365519325 bases, 323519355 kmers -read 1500000 sequences, 388985606 bases, 343985636 kmers -read 1600000 sequences, 411705914 bases, 363705944 kmers -read 1700000 sequences, 434632801 bases, 383632831 kmers -read 1800000 sequences, 456140575 bases, 402140605 kmers -read 1900000 sequences, 477396896 bases, 420396926 kmers -read 2000000 sequences, 498761432 bases, 438761462 kmers -read 2100000 sequences, 518417062 bases, 455417092 kmers -read 2200000 sequences, 537930862 bases, 471930892 kmers -read 2300000 sequences, 556305688 bases, 487305718 kmers -read 2400000 sequences, 573938040 bases, 501938070 kmers -=== step 1.1: 'encoding_input' 2.89863 [sec] (5.76882 [ns/kmer]) -read 2406269 sequences, 574653270 bases, 502465200 kmers -num_kmers 502465200 -cost: 2.0 + 0.287336 [bits/kmer] -saving to file 'tmp_dir/sshash.tmp.run_1755868060474121895.minimizers.0.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868060474121895.minimizers.1.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868060474121895.minimizers.2.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868060474121895.minimizers.3.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868060474121895.minimizers.4.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868060474121895.minimizers.5.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868060474121895.minimizers.6.bin'... -=== step 1.2: 'computing_minimizers_tuples' 3.1134 [sec] (6.19624 [ns/kmer]) -=== step 1: 'parse_file' 6.01219 [sec] (11.9654 [ns/kmer]) - == files to merge = 7 -saving tuples to 'tmp_dir/sshash.tmp.run_1755868060474121895.minimizers.bin' -num_super_kmers = 50000000 -num_minimizers = 72482370 -num_minimizer_positions = 78855216 -num_super_kmers = 78855216 -building minimizers MPHF with 8 threads and 25 partitions (avg. partition size = 3000000)... -=== step 2: 'build_minimizers' 9.30152 [sec] (18.5118 [ns/kmer]) -re-sorting minimizer tuples... -saving to file 'tmp_dir/sshash.tmp.run_1755868060474121895.minimizers.0.bin'... -=== step 2.1: 're-sorting minimizers tuples' 4.88481 [sec] (9.72169 [ns/kmer]) -bits_per_offset = ceil(log2(574653302)) = 30 -reading from 'tmp_dir/sshash.tmp.run_1755868060474121895.minimizers.bin'... -sorting buffer... -saving to file 'tmp_dir/sshash.tmp.run_1755868080735611640.bucket_pairs.0.bin'... -num_singletons 70140084/72482370 (96.7685%) -building: 0.201497 [sec] -computing minimizers offsets: 0.211199 [sec] -encoding: 0.022981 [sec] -=== step 3: 'build_sparse_index' 0.781283 [sec] (1.5549 [ns/kmer]) -max_bucket_size 121065 -log2_max_bucket_size 17 -num_buckets_in_skew_index 4443/72482370 (0.00612977%) -num_partitions 7 -computing sizes of partitions... - partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 1230971 - partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 971702 - partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 653362 - partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 485695 - partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 456634 - partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 396335 - partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 121065: 729292 -num_kmers_in_skew_index 4923991 (0.979967%) -building partitions... - lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 1230971 - building MPHF with 8 threads and 1 partitions (avg. partition size = 1230971)... - built mphs[0] for 1230971 kmers; bits/key = 2.38542 - built positions[0] for 1230971 kmers; bits/key = 7.00029 - lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 971702 - building MPHF with 8 threads and 1 partitions (avg. partition size = 971702)... - built mphs[1] for 971702 kmers; bits/key = 2.37213 - built positions[1] for 971702 kmers; bits/key = 8.00035 - lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 653362 - building MPHF with 8 threads and 1 partitions (avg. partition size = 653362)... - built mphs[2] for 653362 kmers; bits/key = 2.41116 - built positions[2] for 653362 kmers; bits/key = 9.00058 - lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 485695 - building MPHF with 8 threads and 1 partitions (avg. partition size = 485695)... - built mphs[3] for 485695 kmers; bits/key = 2.46163 - built positions[3] for 485695 kmers; bits/key = 10.0007 - lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 456634 - building MPHF with 8 threads and 1 partitions (avg. partition size = 456634)... - built mphs[4] for 456634 kmers; bits/key = 2.36636 - built positions[4] for 456634 kmers; bits/key = 11.0007 - lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 396335 - building MPHF with 8 threads and 1 partitions (avg. partition size = 396335)... - built mphs[5] for 396335 kmers; bits/key = 2.39147 - built positions[5] for 396335 kmers; bits/key = 12.0008 - lower = 4096; upper = 121065; num_bits_per_pos = 17; num_kmers_in_partition = 729292 - building MPHF with 8 threads and 1 partitions (avg. partition size = 729292)... - built mphs[6] for 729292 kmers; bits/key = 2.40424 - built positions[6] for 729292 kmers; bits/key = 17.0005 -num_bits_for_skew_index 61101328(0.121603 [bits/kmer]) -=== step 4: 'build_skew_index' 6.61853 [sec] (13.1721 [ns/kmer]) -=== total_time 27.5983 [sec] (54.9259 [ns/kmer]) -total index size: 489355410 [B] -- 489.355 [MB] -SPACE BREAKDOWN: - minimizers: 0.381049 [bits/kmer] (2.64152 [bits/key]) -- 4.89072% - pieces: 0.0549683 [bits/kmer] -- 0.705511% - sizes: 0.238212 [bits/kmer] -- 3.05742% - offsets: 4.7081 [bits/kmer] -- 60.4279% - strings: 2.28734 [bits/kmer] -- 29.3577% - skew_index: 0.121603 [bits/kmer] -- 1.56076% - weights: 2.92956e-06 [bits/kmer] -- 3.76005e-05% - -------------- - total: 7.79127 [bits/kmer] - === bucket statistics (less) === -buckets with 1 minimizer positions = 96.7685% -buckets with 2 minimizer positions = 2.08482% -buckets with 3 minimizer positions = 0.527127% -buckets with 4 minimizer positions = 0.213044% -buckets with 5 minimizer positions = 0.111486% -buckets with 6 minimizer positions = 0.0671115% -buckets with 7 minimizer positions = 0.0451641% -buckets with 8 minimizer positions = 0.0320781% -buckets with 9 minimizer positions = 0.0239672% -buckets with 10 minimizer positions = 0.0183534% -buckets with 11 minimizer positions = 0.0144821% -buckets with 12 minimizer positions = 0.0117215% -buckets with 13 minimizer positions = 0.00959682% -buckets with 14 minimizer positions = 0.00787226% -buckets with 15 minimizer positions = 0.006683% -buckets with 16 minimizer positions = 0.00565379% -max_bucket_size 121065 -2025-08-22 15:08:08: saving data structure to disk... -2025-08-22 15:08:08: DONE -k = 31, m = 20, seed = 1, num_threads = 8, ram_limit_in_GiB = 16, l = 6, lambda = 7, canonical = false, weighted = false, verbose = true -reading file '/home/giulio/sshash_datasets/kestrel.k31.unitigs.fa.ust.fa.gz'... -read 100000 sequences, 158452704 bases, 155452734 kmers -read 200000 sequences, 318155284 bases, 312155314 kmers -read 300000 sequences, 484429755 bases, 475429785 kmers -read 400000 sequences, 657902795 bases, 645902825 kmers -read 500000 sequences, 832407154 bases, 817407184 kmers -read 600000 sequences, 1004979671 bases, 986979701 kmers -=== step 1.1: 'encoding_input' 5.61036 [sec] (4.87688 [ns/kmer]) -read 682245 sequences, 1170866555 bases, 1150399205 kmers -num_kmers 1150399205 -cost: 2.0 + 0.035583 [bits/kmer] -saving to file 'tmp_dir/sshash.tmp.run_1755868088398204210.minimizers.0.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868088398204210.minimizers.1.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868088398204210.minimizers.2.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868088398204210.minimizers.3.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868088398204210.minimizers.4.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868088398204210.minimizers.5.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868088398204210.minimizers.6.bin'... -=== step 1.2: 'computing_minimizers_tuples' 6.5896 [sec] (5.7281 [ns/kmer]) -=== step 1: 'parse_file' 12.2001 [sec] (10.6051 [ns/kmer]) - == files to merge = 7 -saving tuples to 'tmp_dir/sshash.tmp.run_1755868088398204210.minimizers.bin' -num_super_kmers = 50000000 -num_super_kmers = 100000000 -num_super_kmers = 150000000 -num_minimizers = 173450846 -num_minimizer_positions = 176257872 -num_super_kmers = 176257872 -building minimizers MPHF with 8 threads and 58 partitions (avg. partition size = 3000000)... -=== step 2: 'build_minimizers' 20.7783 [sec] (18.0618 [ns/kmer]) -re-sorting minimizer tuples... -saving to file 'tmp_dir/sshash.tmp.run_1755868088398204210.minimizers.0.bin'... -=== step 2.1: 're-sorting minimizers tuples' 9.57706 [sec] (8.32499 [ns/kmer]) -bits_per_offset = ceil(log2(1170866587)) = 31 -reading from 'tmp_dir/sshash.tmp.run_1755868088398204210.minimizers.bin'... -sorting buffer... -saving to file 'tmp_dir/sshash.tmp.run_1755868131094619778.bucket_pairs.0.bin'... -num_singletons 172100966/173450846 (99.2218%) -building: 0.449571 [sec] -computing minimizers offsets: 0.343787 [sec] -encoding: 0.00637 [sec] -=== step 3: 'build_sparse_index' 1.43792 [sec] (1.24993 [ns/kmer]) -max_bucket_size 2826 -log2_max_bucket_size 12 -num_buckets_in_skew_index 1711/173450846 (0.000986447%) -num_partitions 6 -computing sizes of partitions... - partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 580154 - partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 340341 - partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 185473 - partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 84383 - partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 46606 - partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 2826: 19860 -num_kmers_in_skew_index 1256817 (0.109251%) -building partitions... - lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 580154 - building MPHF with 8 threads and 1 partitions (avg. partition size = 580154)... - built mphs[0] for 580154 kmers; bits/key = 2.42711 - built positions[0] for 580154 kmers; bits/key = 7.00062 - lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 340341 - building MPHF with 8 threads and 1 partitions (avg. partition size = 340341)... - built mphs[1] for 340341 kmers; bits/key = 2.41466 - built positions[1] for 340341 kmers; bits/key = 8.00101 - lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 185473 - building MPHF with 8 threads and 1 partitions (avg. partition size = 185473)... - built mphs[2] for 185473 kmers; bits/key = 2.48222 - built positions[2] for 185473 kmers; bits/key = 9.00202 - lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 84383 - building MPHF with 8 threads and 1 partitions (avg. partition size = 84383)... - built mphs[3] for 84383 kmers; bits/key = 2.71619 - built positions[3] for 84383 kmers; bits/key = 10.0039 - lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 46606 - building MPHF with 8 threads and 1 partitions (avg. partition size = 46606)... - built mphs[4] for 46606 kmers; bits/key = 2.81543 - built positions[4] for 46606 kmers; bits/key = 11.0077 - lower = 2048; upper = 2826; num_bits_per_pos = 12; num_kmers_in_partition = 19860 - building MPHF with 8 threads and 1 partitions (avg. partition size = 19860)... - built mphs[5] for 19860 kmers; bits/key = 3.16455 - built positions[5] for 19860 kmers; bits/key = 12.0169 -num_bits_for_skew_index 13163728(0.0114427 [bits/kmer]) -=== step 4: 'build_skew_index' 2.54635 [sec] (2.21345 [ns/kmer]) -=== total_time 46.5397 [sec] (40.4553 [ns/kmer]) -total index size: 1069708868 [B] -- 1069.71 [MB] -SPACE BREAKDOWN: - minimizers: 0.395755 [bits/kmer] (2.62482 [bits/key]) -- 5.3201% - pieces: 0.00841122 [bits/kmer] -- 0.113071% - sizes: 0.238026 [bits/kmer] -- 3.19976% - offsets: 4.74965 [bits/kmer] -- 63.8491% - strings: 2.03558 [bits/kmer] -- 27.3641% - skew_index: 0.0114427 [bits/kmer] -- 0.153824% - weights: 1.27956e-06 [bits/kmer] -- 1.72009e-05% - -------------- - total: 7.43887 [bits/kmer] - === bucket statistics (less) === -buckets with 1 minimizer positions = 99.2218% -buckets with 2 minimizer positions = 0.587822% -buckets with 3 minimizer positions = 0.0855418% -buckets with 4 minimizer positions = 0.0349903% -buckets with 5 minimizer positions = 0.0188924% -buckets with 6 minimizer positions = 0.0115975% -buckets with 7 minimizer positions = 0.0076944% -buckets with 8 minimizer positions = 0.00552548% -buckets with 9 minimizer positions = 0.00401266% -buckets with 10 minimizer positions = 0.00314556% -buckets with 11 minimizer positions = 0.00248082% -buckets with 12 minimizer positions = 0.002049% -buckets with 13 minimizer positions = 0.0016385% -buckets with 14 minimizer positions = 0.00134851% -buckets with 15 minimizer positions = 0.00113923% -buckets with 16 minimizer positions = 0.000967998% -max_bucket_size 2826 -2025-08-22 15:08:55: saving data structure to disk... -2025-08-22 15:08:55: DONE -k = 31, m = 21, seed = 1, num_threads = 8, ram_limit_in_GiB = 16, l = 6, lambda = 7, canonical = false, weighted = false, verbose = true -reading file '/home/giulio/sshash_datasets/human.k31.unitigs.fa.ust.fa.gz'... -read 100000 sequences, 21756494 bases, 18756524 kmers -read 200000 sequences, 43415082 bases, 37415112 kmers -read 300000 sequences, 65780160 bases, 56780190 kmers -read 400000 sequences, 87471486 bases, 75471516 kmers -read 500000 sequences, 109226866 bases, 94226896 kmers -read 600000 sequences, 131648019 bases, 113648049 kmers -read 700000 sequences, 153316395 bases, 132316425 kmers -read 800000 sequences, 176702606 bases, 152702636 kmers -read 900000 sequences, 198938131 bases, 171938161 kmers -read 1000000 sequences, 220949217 bases, 190949247 kmers -read 1100000 sequences, 244049472 bases, 211049502 kmers -read 1200000 sequences, 267074826 bases, 231074856 kmers -read 1300000 sequences, 288917293 bases, 249917323 kmers -read 1400000 sequences, 311411250 bases, 269411280 kmers -read 1500000 sequences, 333749577 bases, 288749607 kmers -read 1600000 sequences, 356425695 bases, 308425725 kmers -read 1700000 sequences, 378782863 bases, 327782893 kmers -read 1800000 sequences, 401743561 bases, 347743591 kmers -read 1900000 sequences, 424135183 bases, 367135213 kmers -read 2000000 sequences, 446879049 bases, 386879079 kmers -read 2100000 sequences, 468734269 bases, 405734299 kmers -read 2200000 sequences, 490603237 bases, 424603267 kmers -read 2300000 sequences, 513905163 bases, 444905193 kmers -read 2400000 sequences, 537439218 bases, 465439248 kmers -read 2500000 sequences, 560503353 bases, 485503383 kmers -read 2600000 sequences, 583973645 bases, 505973675 kmers -read 2700000 sequences, 606563596 bases, 525563626 kmers -read 2800000 sequences, 629471143 bases, 545471173 kmers -read 2900000 sequences, 653036453 bases, 566036483 kmers -read 3000000 sequences, 676313831 bases, 586313861 kmers -read 3100000 sequences, 700155099 bases, 607155129 kmers -read 3200000 sequences, 723256428 bases, 627256458 kmers -read 3300000 sequences, 745736801 bases, 646736831 kmers -read 3400000 sequences, 768718458 bases, 666718488 kmers -read 3500000 sequences, 791774449 bases, 686774479 kmers -read 3600000 sequences, 814593816 bases, 706593846 kmers -read 3700000 sequences, 838379879 bases, 727379909 kmers -read 3800000 sequences, 860634933 bases, 746634963 kmers -read 3900000 sequences, 884047075 bases, 767047105 kmers -read 4000000 sequences, 907479686 bases, 787479716 kmers -read 4100000 sequences, 930543338 bases, 807543368 kmers -read 4200000 sequences, 953267462 bases, 827267492 kmers -read 4300000 sequences, 976927833 bases, 847927863 kmers -read 4400000 sequences, 1000738248 bases, 868738278 kmers -read 4500000 sequences, 1024531209 bases, 889531239 kmers -read 4600000 sequences, 1047717223 bases, 909717253 kmers -read 4700000 sequences, 1071027493 bases, 930027523 kmers -read 4800000 sequences, 1094084552 bases, 950084582 kmers -read 4900000 sequences, 1117956137 bases, 970956167 kmers -read 5000000 sequences, 1141125024 bases, 991125054 kmers -read 5100000 sequences, 1164319107 bases, 1011319137 kmers -read 5200000 sequences, 1187390563 bases, 1031390593 kmers -read 5300000 sequences, 1211053129 bases, 1052053159 kmers -read 5400000 sequences, 1234026672 bases, 1072026702 kmers -read 5500000 sequences, 1257295769 bases, 1092295799 kmers -read 5600000 sequences, 1279760698 bases, 1111760728 kmers -read 5700000 sequences, 1303188807 bases, 1132188837 kmers -read 5800000 sequences, 1326902250 bases, 1152902280 kmers -read 5900000 sequences, 1350165652 bases, 1173165682 kmers -read 6000000 sequences, 1373185888 bases, 1193185918 kmers -read 6100000 sequences, 1396446455 bases, 1213446485 kmers -read 6200000 sequences, 1420887422 bases, 1234887452 kmers -read 6300000 sequences, 1444176084 bases, 1255176114 kmers -read 6400000 sequences, 1467761803 bases, 1275761833 kmers -read 6500000 sequences, 1491517469 bases, 1296517499 kmers -read 6600000 sequences, 1515086149 bases, 1317086179 kmers -read 6700000 sequences, 1537506202 bases, 1336506232 kmers -read 6800000 sequences, 1560946391 bases, 1356946421 kmers -read 6900000 sequences, 1583945488 bases, 1376945518 kmers -read 7000000 sequences, 1606825999 bases, 1396826029 kmers -read 7100000 sequences, 1630889570 bases, 1417889600 kmers -read 7200000 sequences, 1653502338 bases, 1437502368 kmers -read 7300000 sequences, 1676836074 bases, 1457836104 kmers -read 7400000 sequences, 1700581893 bases, 1478581923 kmers -read 7500000 sequences, 1723991216 bases, 1498991246 kmers -read 7600000 sequences, 1747267694 bases, 1519267724 kmers -read 7700000 sequences, 1769997128 bases, 1538997158 kmers -read 7800000 sequences, 1793090725 bases, 1559090755 kmers -read 7900000 sequences, 1816572169 bases, 1579572199 kmers -read 8000000 sequences, 1839912405 bases, 1599912435 kmers -read 8100000 sequences, 1863462142 bases, 1620462172 kmers -read 8200000 sequences, 1887689183 bases, 1641689213 kmers -read 8300000 sequences, 1911245326 bases, 1662245356 kmers -read 8400000 sequences, 1934891774 bases, 1682891804 kmers -read 8500000 sequences, 1958265360 bases, 1703265390 kmers -read 8600000 sequences, 1981114530 bases, 1723114560 kmers -read 8700000 sequences, 2005160537 bases, 1744160567 kmers -read 8800000 sequences, 2028270585 bases, 1764270615 kmers -read 8900000 sequences, 2051383223 bases, 1784383253 kmers -read 9000000 sequences, 2074474142 bases, 1804474172 kmers -read 9100000 sequences, 2096460797 bases, 1823460827 kmers -read 9200000 sequences, 2119508963 bases, 1843508993 kmers -read 9300000 sequences, 2142674047 bases, 1863674077 kmers -read 9400000 sequences, 2164744513 bases, 1882744543 kmers -read 9500000 sequences, 2187346618 bases, 1902346648 kmers -read 9600000 sequences, 2209977347 bases, 1921977377 kmers -read 9700000 sequences, 2232411621 bases, 1941411651 kmers -read 9800000 sequences, 2254902856 bases, 1960902886 kmers -read 9900000 sequences, 2277498326 bases, 1980498356 kmers -read 10000000 sequences, 2299499706 bases, 1999499736 kmers -read 10100000 sequences, 2321026083 bases, 2018026113 kmers -read 10200000 sequences, 2343732502 bases, 2037732532 kmers -read 10300000 sequences, 2364673146 bases, 2055673176 kmers -read 10400000 sequences, 2386756547 bases, 2074756577 kmers -read 10500000 sequences, 2407972829 bases, 2092972859 kmers -read 10600000 sequences, 2429720496 bases, 2111720526 kmers -read 10700000 sequences, 2450999850 bases, 2129999880 kmers -read 10800000 sequences, 2471991237 bases, 2147991267 kmers -read 10900000 sequences, 2492905716 bases, 2165905746 kmers -read 11000000 sequences, 2514209537 bases, 2184209567 kmers -read 11100000 sequences, 2535680432 bases, 2202680462 kmers -read 11200000 sequences, 2556527106 bases, 2220527136 kmers -read 11300000 sequences, 2577149683 bases, 2238149713 kmers -read 11400000 sequences, 2597185570 bases, 2255185600 kmers -read 11500000 sequences, 2617632290 bases, 2272632320 kmers -read 11600000 sequences, 2638223488 bases, 2290223518 kmers -read 11700000 sequences, 2658091095 bases, 2307091125 kmers -read 11800000 sequences, 2676727586 bases, 2322727616 kmers -read 11900000 sequences, 2695970855 bases, 2338970885 kmers -read 12000000 sequences, 2714887902 bases, 2354887932 kmers -read 12100000 sequences, 2733216523 bases, 2370216553 kmers -read 12200000 sequences, 2751654575 bases, 2385654605 kmers -read 12300000 sequences, 2769824681 bases, 2400824711 kmers -read 12400000 sequences, 2788136547 bases, 2416136577 kmers -read 12500000 sequences, 2806066941 bases, 2431066971 kmers -read 12600000 sequences, 2823691733 bases, 2445691763 kmers -read 12700000 sequences, 2841752082 bases, 2460752112 kmers -read 12800000 sequences, 2859305620 bases, 2475305650 kmers -read 12900000 sequences, 2876642001 bases, 2489642031 kmers -read 13000000 sequences, 2893631233 bases, 2503631263 kmers -=== step 1.1: 'encoding_input' 14.5875 [sec] (5.82232 [ns/kmer]) -read 13014214 sequences, 2895872181 bases, 2505445761 kmers -num_kmers 2505445761 -cost: 2.0 + 0.311662 [bits/kmer] -saving to file 'tmp_dir/sshash.tmp.run_1755868135630793919.minimizers.0.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868135630793919.minimizers.1.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868135630793919.minimizers.2.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868135630793919.minimizers.3.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868135630793919.minimizers.4.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868135630793919.minimizers.5.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755868135630793919.minimizers.6.bin'... -=== step 1.2: 'computing_minimizers_tuples' 14.7039 [sec] (5.86877 [ns/kmer]) -=== step 1: 'parse_file' 29.2915 [sec] (11.6911 [ns/kmer]) - == files to merge = 7 -saving tuples to 'tmp_dir/sshash.tmp.run_1755868135630793919.minimizers.bin' -num_super_kmers = 50000000 -num_super_kmers = 100000000 -num_super_kmers = 150000000 -num_super_kmers = 200000000 -num_super_kmers = 250000000 -num_super_kmers = 300000000 -num_super_kmers = 350000000 -num_super_kmers = 400000000 -num_minimizers = 389285242 -num_minimizer_positions = 428077581 -num_super_kmers = 428077581 -building minimizers MPHF with 8 threads and 130 partitions (avg. partition size = 3000000)... -=== step 2: 'build_minimizers' 52.7403 [sec] (21.0503 [ns/kmer]) -re-sorting minimizer tuples... -saving to file 'tmp_dir/sshash.tmp.run_1755868135630793919.minimizers.0.bin'... -=== step 2.1: 're-sorting minimizers tuples' 27.426 [sec] (10.9466 [ns/kmer]) -bits_per_offset = ceil(log2(2895872213)) = 32 -reading from 'tmp_dir/sshash.tmp.run_1755868135630793919.minimizers.bin'... -sorting buffer... -saving to file 'tmp_dir/sshash.tmp.run_1755868245426962616.bucket_pairs.0.bin'... -num_singletons 378034182/389285242 (97.1098%) -building: 1.07647 [sec] -computing minimizers offsets: 0.764063 [sec] -encoding: 0.119168 [sec] -=== step 3: 'build_sparse_index' 3.7931 [sec] (1.51394 [ns/kmer]) -max_bucket_size 25155 -log2_max_bucket_size 15 -num_buckets_in_skew_index 48318/389285242 (0.012412%) -num_partitions 7 -computing sizes of partitions... - partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 12097707 - partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 8788211 - partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 5689032 - partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 3350962 - partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 1911299 - partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 1185372 - partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 25155: 644733 -num_kmers_in_skew_index 33667316 (1.34377%) -building partitions... - lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 12097707 - building MPHF with 8 threads and 5 partitions (avg. partition size = 3000000)... - built mphs[0] for 12097707 kmers; bits/key = 2.51293 - built positions[0] for 12097707 kmers; bits/key = 7.00003 - lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 8788211 - building MPHF with 8 threads and 3 partitions (avg. partition size = 3000000)... - built mphs[1] for 8788211 kmers; bits/key = 2.37002 - built positions[1] for 8788211 kmers; bits/key = 8.00004 - lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 5689032 - building MPHF with 8 threads and 2 partitions (avg. partition size = 3000000)... - built mphs[2] for 5689032 kmers; bits/key = 2.37617 - built positions[2] for 5689032 kmers; bits/key = 9.00007 - lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 3350962 - building MPHF with 8 threads and 2 partitions (avg. partition size = 3000000)... - built mphs[3] for 3350962 kmers; bits/key = 2.90674 - built positions[3] for 3350962 kmers; bits/key = 10.0001 - lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 1911299 - building MPHF with 8 threads and 1 partitions (avg. partition size = 1911299)... - built mphs[4] for 1911299 kmers; bits/key = 2.35739 - built positions[4] for 1911299 kmers; bits/key = 11.0002 - lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1185372 - building MPHF with 8 threads and 1 partitions (avg. partition size = 1185372)... - built mphs[5] for 1185372 kmers; bits/key = 2.39578 - built positions[5] for 1185372 kmers; bits/key = 12.0003 - lower = 4096; upper = 25155; num_bits_per_pos = 15; num_kmers_in_partition = 644733 - building MPHF with 8 threads and 1 partitions (avg. partition size = 644733)... - built mphs[6] for 644733 kmers; bits/key = 2.40658 - built positions[6] for 644733 kmers; bits/key = 15.0006 -num_bits_for_skew_index 368007536(0.146883 [bits/kmer]) -=== step 4: 'build_skew_index' 17.1404 [sec] (6.84126 [ns/kmer]) -=== total_time 130.391 [sec] (52.0432 [ns/kmer]) -total index size: 2708856220 [B] -- 2708.86 [MB] -SPACE BREAKDOWN: - minimizers: 0.406625 [bits/kmer] (2.61704 [bits/key]) -- 4.70114% - pieces: 0.0585865 [bits/kmer] -- 0.67734% - sizes: 0.258258 [bits/kmer] -- 2.98582% - offsets: 5.46748 [bits/kmer] -- 63.2116% - strings: 2.31166 [bits/kmer] -- 26.726% - skew_index: 0.146883 [bits/kmer] -- 1.69817% - weights: 5.8752e-07 [bits/kmer] -- 6.79253e-06% - -------------- - total: 8.6495 [bits/kmer] - === bucket statistics (less) === -buckets with 1 minimizer positions = 97.1098% -buckets with 2 minimizer positions = 1.75083% -buckets with 3 minimizer positions = 0.44863% -buckets with 4 minimizer positions = 0.203142% -buckets with 5 minimizer positions = 0.116347% -buckets with 6 minimizer positions = 0.0749733% -buckets with 7 minimizer positions = 0.0518841% -buckets with 8 minimizer positions = 0.0381006% -buckets with 9 minimizer positions = 0.0288385% -buckets with 10 minimizer positions = 0.0224391% -buckets with 11 minimizer positions = 0.0179622% -buckets with 12 minimizer positions = 0.0146718% -buckets with 13 minimizer positions = 0.0122715% -buckets with 14 minimizer positions = 0.0103271% -buckets with 15 minimizer positions = 0.00873164% -buckets with 16 minimizer positions = 0.00747652% -max_bucket_size 25155 -2025-08-22 15:11:06: saving data structure to disk... -2025-08-22 15:11:07: DONE diff --git a/benchmarks/results-22-08-25/k31/22-08-25.regular.high-hit.streaming_query_log b/benchmarks/results-22-08-25/k31/22-08-25.regular.high-hit.streaming_query_log deleted file mode 100644 index 6eaf5d6..0000000 --- a/benchmarks/results-22-08-25/k31/22-08-25.regular.high-hit.streaming_query_log +++ /dev/null @@ -1,30 +0,0 @@ -2025-08-22 15:20:47: performing queries from file '/home/giulio/sshash_queries/SRR12858649.fastq.gz'... -2025-08-22 15:21:01: DONE -==== query report: -num_kmers = 163287360 -num_positive_kmers = 132860997 (81.3664%) -num_negative_kmers = 30426363 (18.6336%) -num_invalid_kmers = 0 (0%) -num_searches = 7886675/132860997 (5.93603%) -num_extensions = 124974322/132860997 (94.064%) -elapsed = 13179 millisec / 13.179 sec / 0.21965 min / 80.7104 ns/kmer -2025-08-22 15:21:01: performing queries from file '/home/giulio/sshash_queries/SRR11449743_1.fastq.gz'... -2025-08-22 15:22:23: DONE -==== query report: -num_kmers = 695737535 -num_positive_kmers = 525542891 (75.5375%) -num_negative_kmers = 170183654 (24.4609%) -num_invalid_kmers = 10990 (0.00157962%) -num_searches = 13101335/525542891 (2.49291%) -num_extensions = 512441556/525542891 (97.5071%) -elapsed = 81847.8 millisec / 81.8478 sec / 1.36413 min / 117.642 ns/kmer -2025-08-22 15:22:24: performing queries from file '/home/giulio/sshash_queries/SRR5833294.fastq.gz'... -2025-08-22 15:27:26: DONE -==== query report: -num_kmers = 1569974986 -num_positive_kmers = 1437870528 (91.5856%) -num_negative_kmers = 131075447 (8.34889%) -num_invalid_kmers = 1029011 (0.0655431%) -num_searches = 110228289/1437870528 (7.66608%) -num_extensions = 1327642239/1437870528 (92.3339%) -elapsed = 301953 millisec / 301.953 sec / 5.03256 min / 192.33 ns/kmer diff --git a/benchmarks/results-22-08-25/k31/22-08-25.regular.low-hit.streaming_query_log b/benchmarks/results-22-08-25/k31/22-08-25.regular.low-hit.streaming_query_log deleted file mode 100644 index efc2d47..0000000 --- a/benchmarks/results-22-08-25/k31/22-08-25.regular.low-hit.streaming_query_log +++ /dev/null @@ -1,30 +0,0 @@ -2025-08-22 15:32:46: performing queries from file '/home/giulio/sshash_queries/SRR11449743_1.fastq.gz'... -2025-08-22 15:35:10: DONE -==== query report: -num_kmers = 695737535 -num_positive_kmers = 4754204 (0.683333%) -num_negative_kmers = 690972341 (99.3151%) -num_invalid_kmers = 10990 (0.00157962%) -num_searches = 3613872/4754204 (76.0142%) -num_extensions = 1140332/4754204 (23.9858%) -elapsed = 144520 millisec / 144.52 sec / 2.40867 min / 207.722 ns/kmer -2025-08-22 15:35:11: performing queries from file '/home/giulio/sshash_queries/SRR12858649.fastq.gz'... -2025-08-22 15:35:25: DONE -==== query report: -num_kmers = 163287360 -num_positive_kmers = 790414 (0.484063%) -num_negative_kmers = 162496946 (99.5159%) -num_invalid_kmers = 0 (0%) -num_searches = 499575/790414 (63.2042%) -num_extensions = 290839/790414 (36.7958%) -elapsed = 14211.5 millisec / 14.2115 sec / 0.236859 min / 87.0339 ns/kmer -2025-08-22 15:35:26: performing queries from file '/home/giulio/sshash_queries/SRR5901135_1.fastq.gz'... -2025-08-22 15:36:57: DONE -==== query report: -num_kmers = 395433242 -num_positive_kmers = 1134 (0.000286774%) -num_negative_kmers = 395408021 (99.9936%) -num_invalid_kmers = 24087 (0.00609129%) -num_searches = 895/1134 (78.9242%) -num_extensions = 239/1134 (21.0758%) -elapsed = 91073.5 millisec / 91.0735 sec / 1.51789 min / 230.313 ns/kmer diff --git a/benchmarks/results-22-08-25/k63/22-08-25.canon.bench_log b/benchmarks/results-22-08-25/k63/22-08-25.canon.bench_log deleted file mode 100644 index 405a5ed..0000000 --- a/benchmarks/results-22-08-25/k63/22-08-25.canon.bench_log +++ /dev/null @@ -1,18 +0,0 @@ -avg_nanosec_per_positive_lookup 932.74 -avg_nanosec_per_negative_lookup 636.919 -avg_nanosec_per_positive_lookup_advanced 903.182 -avg_nanosec_per_negative_lookup_advanced 635.834 -avg_nanosec_per_access 382.206 -iterator: avg_nanosec_per_kmer 19.9772 -avg_nanosec_per_positive_lookup 854.129 -avg_nanosec_per_negative_lookup 715.394 -avg_nanosec_per_positive_lookup_advanced 823.575 -avg_nanosec_per_negative_lookup_advanced 712.864 -avg_nanosec_per_access 320.935 -iterator: avg_nanosec_per_kmer 19.9017 -avg_nanosec_per_positive_lookup 1168.7 -avg_nanosec_per_negative_lookup 793.31 -avg_nanosec_per_positive_lookup_advanced 1138.05 -avg_nanosec_per_negative_lookup_advanced 790.911 -avg_nanosec_per_access 498.022 -iterator: avg_nanosec_per_kmer 19.9255 diff --git a/benchmarks/results-22-08-25/k63/22-08-25.canon.build_log b/benchmarks/results-22-08-25/k63/22-08-25.canon.build_log deleted file mode 100644 index 8b06d7d..0000000 --- a/benchmarks/results-22-08-25/k63/22-08-25.canon.build_log +++ /dev/null @@ -1,365 +0,0 @@ -k = 63, m = 23, seed = 1, num_threads = 8, ram_limit_in_GiB = 16, l = 6, lambda = 7, canonical = true, weighted = false, verbose = true -reading file '/home/giulio/sshash_datasets/cod.k63.unitigs.fa.ust.fa.gz'... -read 100000 sequences, 63806937 bases, 57606999 kmers -read 200000 sequences, 129066588 bases, 116666650 kmers -read 300000 sequences, 192862556 bases, 174262618 kmers -read 400000 sequences, 256212661 bases, 231412723 kmers -read 500000 sequences, 319282064 bases, 288282126 kmers -read 600000 sequences, 379117165 bases, 341917227 kmers -read 700000 sequences, 436654305 bases, 393254367 kmers -read 800000 sequences, 492876125 bases, 443276187 kmers -read 900000 sequences, 547344740 bases, 491544802 kmers -read 1000000 sequences, 598624540 bases, 536624602 kmers -=== step 1.1: 'encoding_input' 3.06586 [sec] (5.50833 [ns/kmer]) -read 1049410 sequences, 621649078 bases, 556585658 kmers -num_kmers 556585658 -cost: 2.0 + 0.233795 [bits/kmer] -saving to file 'tmp_dir/sshash.tmp.run_1755933730548601913.minimizers.0.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933730548601913.minimizers.1.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933730548601913.minimizers.2.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933730548601913.minimizers.3.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933730548601913.minimizers.4.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933730548601913.minimizers.5.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933730548601913.minimizers.6.bin'... -=== step 1.2: 'computing_minimizers_tuples' 2.30899 [sec] (4.1485 [ns/kmer]) -=== step 1: 'parse_file' 5.37497 [sec] (9.65704 [ns/kmer]) - == files to merge = 7 -saving tuples to 'tmp_dir/sshash.tmp.run_1755933730548601913.minimizers.bin' -num_minimizers = 28789847 -num_minimizer_positions = 33868028 -num_super_kmers = 35598946 -building minimizers MPHF with 8 threads and 10 partitions (avg. partition size = 3000000)... -=== step 2: 'build_minimizers' 4.23508 [sec] (7.60903 [ns/kmer]) -re-sorting minimizer tuples... -saving to file 'tmp_dir/sshash.tmp.run_1755933730548601913.minimizers.0.bin'... -=== step 2.1: 're-sorting minimizers tuples' 1.92984 [sec] (3.46728 [ns/kmer]) -bits_per_offset = ceil(log2(621649142)) = 30 -reading from 'tmp_dir/sshash.tmp.run_1755933730548601913.minimizers.bin'... -sorting buffer... -saving to file 'tmp_dir/sshash.tmp.run_1755933742116440678.bucket_pairs.0.bin'... -num_singletons 27569809/28789847 (95.7623%) -building: 0.081542 [sec] -computing minimizers offsets: 0.118891 [sec] -encoding: 0.008141 [sec] -=== step 3: 'build_sparse_index' 0.377037 [sec] (0.677411 [ns/kmer]) -max_bucket_size 308505 -log2_max_bucket_size 19 -num_buckets_in_skew_index 5779/28789847 (0.020073%) -num_partitions 7 -computing sizes of partitions... - partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 5298924 - partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 3701291 - partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 2357254 - partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 1249465 - partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 1155039 - partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 1137726 - partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 308505: 4187635 -num_kmers_in_skew_index 19087334 (3.42936%) -building partitions... - lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 5298924 - building MPHF with 8 threads and 2 partitions (avg. partition size = 3000000)... - built mphs[0] for 5298924 kmers; bits/key = 2.39231 - built positions[0] for 5298924 kmers; bits/key = 7.00006 - lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 3701291 - building MPHF with 8 threads and 2 partitions (avg. partition size = 3000000)... - built mphs[1] for 3701291 kmers; bits/key = 2.74283 - built positions[1] for 3701291 kmers; bits/key = 8.0001 - lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 2357254 - building MPHF with 8 threads and 1 partitions (avg. partition size = 2357254)... - built mphs[2] for 2357254 kmers; bits/key = 2.34474 - built positions[2] for 2357254 kmers; bits/key = 9.00014 - lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 1249465 - building MPHF with 8 threads and 1 partitions (avg. partition size = 1249465)... - built mphs[3] for 1249465 kmers; bits/key = 2.39151 - built positions[3] for 1249465 kmers; bits/key = 10.0003 - lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 1155039 - building MPHF with 8 threads and 1 partitions (avg. partition size = 1155039)... - built mphs[4] for 1155039 kmers; bits/key = 2.39906 - built positions[4] for 1155039 kmers; bits/key = 11.0003 - lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1137726 - building MPHF with 8 threads and 1 partitions (avg. partition size = 1137726)... - built mphs[5] for 1137726 kmers; bits/key = 2.39532 - built positions[5] for 1137726 kmers; bits/key = 12.0003 - lower = 4096; upper = 308505; num_bits_per_pos = 19; num_kmers_in_partition = 4187635 - building MPHF with 8 threads and 2 partitions (avg. partition size = 3000000)... - built mphs[6] for 4187635 kmers; bits/key = 2.62161 - built positions[6] for 4187635 kmers; bits/key = 19.0001 -num_bits_for_skew_index 254157024(0.456636 [bits/kmer]) -=== step 4: 'build_skew_index' 11.3275 [sec] (20.3517 [ns/kmer]) -=== total_time 23.2444 [sec] (41.7625 [ns/kmer]) -total index size: 331637748 [B] -- 331.638 [MB] -SPACE BREAKDOWN: - minimizers: 0.137431 [bits/kmer] (2.65691 [bits/key]) -- 2.88312% - pieces: 0.0233256 [bits/kmer] -- 0.48934% - sizes: 0.0900651 [bits/kmer] -- 1.88945% - offsets: 1.82549 [bits/kmer] -- 38.2963% - strings: 2.2338 [bits/kmer] -- 46.8621% - skew_index: 0.456636 [bits/kmer] -- 9.57962% - weights: 2.6447e-06 [bits/kmer] -- 5.54822e-05% - -------------- - total: 4.76675 [bits/kmer] - === bucket statistics (less) === -buckets with 1 minimizer positions = 95.7623% -buckets with 2 minimizer positions = 2.31119% -buckets with 3 minimizer positions = 0.765655% -buckets with 4 minimizer positions = 0.357056% -buckets with 5 minimizer positions = 0.200974% -buckets with 6 minimizer positions = 0.126135% -buckets with 7 minimizer positions = 0.0851446% -buckets with 8 minimizer positions = 0.0608339% -buckets with 9 minimizer positions = 0.0458495% -buckets with 10 minimizer positions = 0.035773% -buckets with 11 minimizer positions = 0.0286247% -buckets with 12 minimizer positions = 0.0232721% -buckets with 13 minimizer positions = 0.0190901% -buckets with 14 minimizer positions = 0.0160543% -buckets with 15 minimizer positions = 0.0139007% -buckets with 16 minimizer positions = 0.012098% -max_bucket_size 308505 -2025-08-23 09:22:33: saving data structure to disk... -2025-08-23 09:22:33: DONE -k = 63, m = 23, seed = 1, num_threads = 8, ram_limit_in_GiB = 16, l = 6, lambda = 7, canonical = true, weighted = false, verbose = true -reading file '/home/giulio/sshash_datasets/kestrel.k63.unitigs.fa.ust.fa.gz'... -read 100000 sequences, 796964911 bases, 790764973 kmers -=== step 1.1: 'encoding_input' 5.82317 [sec] (5.04061 [ns/kmer]) -read 158680 sequences, 1165088827 bases, 1155250667 kmers -num_kmers 1155250667 -cost: 2.0 + 0.0170321 [bits/kmer] -saving to file 'tmp_dir/sshash.tmp.run_1755933753979807462.minimizers.0.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933753979807462.minimizers.1.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933753979807462.minimizers.2.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933753979807462.minimizers.3.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933753979807462.minimizers.4.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933753979807462.minimizers.5.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933753979807462.minimizers.6.bin'... -=== step 1.2: 'computing_minimizers_tuples' 4.48244 [sec] (3.88006 [ns/kmer]) -=== step 1: 'parse_file' 10.3057 [sec] (8.92079 [ns/kmer]) - == files to merge = 7 -saving tuples to 'tmp_dir/sshash.tmp.run_1755933753979807462.minimizers.bin' -num_super_kmers = 50000000 -num_minimizers = 67576153 -num_minimizer_positions = 68454274 -num_super_kmers = 72275777 -building minimizers MPHF with 8 threads and 23 partitions (avg. partition size = 3000000)... -=== step 2: 'build_minimizers' 7.9737 [sec] (6.90214 [ns/kmer]) -re-sorting minimizer tuples... -saving to file 'tmp_dir/sshash.tmp.run_1755933753979807462.minimizers.0.bin'... -=== step 2.1: 're-sorting minimizers tuples' 3.67553 [sec] (3.18159 [ns/kmer]) -bits_per_offset = ceil(log2(1165088891)) = 31 -reading from 'tmp_dir/sshash.tmp.run_1755933753979807462.minimizers.bin'... -sorting buffer... -saving to file 'tmp_dir/sshash.tmp.run_1755933775988195505.bucket_pairs.0.bin'... -num_singletons 67252894/67576153 (99.5216%) -building: 0.172239 [sec] -computing minimizers offsets: 0.149836 [sec] -encoding: 0.00163 [sec] -=== step 3: 'build_sparse_index' 0.595998 [sec] (0.515904 [ns/kmer]) -max_bucket_size 1910 -log2_max_bucket_size 11 -num_buckets_in_skew_index 948/67576153 (0.00140286%) -num_partitions 5 -computing sizes of partitions... - partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 988498 - partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 672272 - partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 348636 - partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 197046 - partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 1910: 181617 -num_kmers_in_skew_index 2388069 (0.206714%) -building partitions... - lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 988498 - building MPHF with 8 threads and 1 partitions (avg. partition size = 988498)... - built mphs[0] for 988498 kmers; bits/key = 2.36143 - built positions[0] for 988498 kmers; bits/key = 7.00033 - lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 672272 - building MPHF with 8 threads and 1 partitions (avg. partition size = 672272)... - built mphs[1] for 672272 kmers; bits/key = 2.41267 - built positions[1] for 672272 kmers; bits/key = 8.00048 - lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 348636 - building MPHF with 8 threads and 1 partitions (avg. partition size = 348636)... - built mphs[2] for 348636 kmers; bits/key = 2.41728 - built positions[2] for 348636 kmers; bits/key = 9.00093 - lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 197046 - building MPHF with 8 threads and 1 partitions (avg. partition size = 197046)... - built mphs[3] for 197046 kmers; bits/key = 2.53553 - built positions[3] for 197046 kmers; bits/key = 10.0018 - lower = 1024; upper = 1910; num_bits_per_pos = 11; num_kmers_in_partition = 181617 - building MPHF with 8 threads and 1 partitions (avg. partition size = 181617)... - built mphs[4] for 181617 kmers; bits/key = 2.49237 - built positions[4] for 181617 kmers; bits/key = 11.002 -num_bits_for_skew_index 25156768(0.021776 [bits/kmer]) -=== step 4: 'build_skew_index' 3.32103 [sec] (2.87473 [ns/kmer]) -=== total_time 25.872 [sec] (22.3951 [ns/kmer]) -total index size: 595424366 [B] -- 595.424 [MB] -SPACE BREAKDOWN: - minimizers: 0.153137 [bits/kmer] (2.61795 [bits/key]) -- 3.71397% - pieces: 0.00224854 [bits/kmer] -- 0.0545332% - sizes: 0.0921592 [bits/kmer] -- 2.23511% - offsets: 1.8369 [bits/kmer] -- 44.5498% - strings: 2.01703 [bits/kmer] -- 48.9184% - skew_index: 0.021776 [bits/kmer] -- 0.528127% - weights: 1.27418e-06 [bits/kmer] -- 3.09023e-05% - -------------- - total: 4.12326 [bits/kmer] - === bucket statistics (less) === -buckets with 1 minimizer positions = 99.5216% -buckets with 2 minimizer positions = 0.324159% -buckets with 3 minimizer positions = 0.0663932% -buckets with 4 minimizer positions = 0.0279063% -buckets with 5 minimizer positions = 0.0153235% -buckets with 6 minimizer positions = 0.00951519% -buckets with 7 minimizer positions = 0.00640907% -buckets with 8 minimizer positions = 0.0046984% -buckets with 9 minimizer positions = 0.0034302% -buckets with 10 minimizer positions = 0.00267254% -buckets with 11 minimizer positions = 0.00206138% -buckets with 12 minimizer positions = 0.00169438% -buckets with 13 minimizer positions = 0.00130075% -buckets with 14 minimizer positions = 0.00118385% -buckets with 15 minimizer positions = 0.00103439% -buckets with 16 minimizer positions = 0.00086273% -max_bucket_size 1910 -2025-08-23 09:22:59: saving data structure to disk... -2025-08-23 09:23:00: DONE -k = 63, m = 24, seed = 1, num_threads = 8, ram_limit_in_GiB = 16, l = 6, lambda = 7, canonical = true, weighted = false, verbose = true -reading file '/home/giulio/sshash_datasets/human.k63.unitigs.fa.ust.fa.gz'... -read 100000 sequences, 105696573 bases, 99496635 kmers -read 200000 sequences, 214697764 bases, 202297826 kmers -read 300000 sequences, 314555962 bases, 295956024 kmers -read 400000 sequences, 417096019 bases, 392296081 kmers -read 500000 sequences, 531577423 bases, 500577485 kmers -read 600000 sequences, 641881766 bases, 604681828 kmers -read 700000 sequences, 753533625 bases, 710133687 kmers -read 800000 sequences, 860572728 bases, 810972790 kmers -read 900000 sequences, 966201746 bases, 910401808 kmers -read 1000000 sequences, 1074799974 bases, 1012800036 kmers -read 1100000 sequences, 1177786303 bases, 1109586365 kmers -read 1200000 sequences, 1284445481 bases, 1210045543 kmers -read 1300000 sequences, 1387959208 bases, 1307359270 kmers -read 1400000 sequences, 1496344159 bases, 1409544221 kmers -read 1500000 sequences, 1601642232 bases, 1508642294 kmers -read 1600000 sequences, 1704579905 bases, 1605379967 kmers -read 1700000 sequences, 1809734567 bases, 1704334629 kmers -read 1800000 sequences, 1913668987 bases, 1802069049 kmers -read 1900000 sequences, 2015410398 bases, 1897610460 kmers -read 2000000 sequences, 2111050791 bases, 1987050853 kmers -read 2100000 sequences, 2208479896 bases, 2078279958 kmers -read 2200000 sequences, 2297838310 bases, 2161438372 kmers -read 2300000 sequences, 2384042166 bases, 2241442228 kmers -read 2400000 sequences, 2476010359 bases, 2327210421 kmers -read 2500000 sequences, 2554605267 bases, 2399605329 kmers -read 2600000 sequences, 2632827152 bases, 2471627214 kmers -read 2700000 sequences, 2705855836 bases, 2538455898 kmers -read 2800000 sequences, 2777866668 bases, 2604266730 kmers -read 2900000 sequences, 2846376916 bases, 2666576978 kmers -read 3000000 sequences, 2913930048 bases, 2727930110 kmers -=== step 1.1: 'encoding_input' 14.791 [sec] (5.33817 [ns/kmer]) -read 3079563 sequences, 2961741299 bases, 2770808393 kmers -num_kmers 2770808393 -cost: 2.0 + 0.137817 [bits/kmer] -saving to file 'tmp_dir/sshash.tmp.run_1755933780186790770.minimizers.0.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933780186790770.minimizers.1.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933780186790770.minimizers.2.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933780186790770.minimizers.3.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933780186790770.minimizers.4.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933780186790770.minimizers.5.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933780186790770.minimizers.6.bin'... -=== step 1.2: 'computing_minimizers_tuples' 10.4363 [sec] (3.76652 [ns/kmer]) -=== step 1: 'parse_file' 25.2275 [sec] (9.10473 [ns/kmer]) - == files to merge = 7 -saving tuples to 'tmp_dir/sshash.tmp.run_1755933780186790770.minimizers.bin' -num_super_kmers = 50000000 -num_super_kmers = 100000000 -num_super_kmers = 150000000 -num_minimizers = 144189268 -num_minimizer_positions = 168680035 -num_super_kmers = 177508359 -building minimizers MPHF with 8 threads and 49 partitions (avg. partition size = 3000000)... -=== step 2: 'build_minimizers' 18.4473 [sec] (6.65772 [ns/kmer]) -re-sorting minimizer tuples... -saving to file 'tmp_dir/sshash.tmp.run_1755933780186790770.minimizers.0.bin'... -=== step 2.1: 're-sorting minimizers tuples' 9.18088 [sec] (3.31343 [ns/kmer]) -bits_per_offset = ceil(log2(2961741363)) = 32 -reading from 'tmp_dir/sshash.tmp.run_1755933780186790770.minimizers.bin'... -sorting buffer... -saving to file 'tmp_dir/sshash.tmp.run_1755933833177503584.bucket_pairs.0.bin'... -num_singletons 140024257/144189268 (97.1114%) -building: 0.400078 [sec] -computing minimizers offsets: 0.370622 [sec] -encoding: 0.029527 [sec] -=== step 3: 'build_sparse_index' 1.56987 [sec] (0.566573 [ns/kmer]) -max_bucket_size 79613 -log2_max_bucket_size 17 -num_buckets_in_skew_index 38094/144189268 (0.0264194%) -num_partitions 7 -computing sizes of partitions... - partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 28443207 - partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 26786499 - partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 23905222 - partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 20445613 - partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 19605913 - partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 17586499 - partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 79613: 41784999 -num_kmers_in_skew_index 178557952 (6.44425%) -building partitions... - lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 28443207 - building MPHF with 8 threads and 10 partitions (avg. partition size = 3000000)... - built mphs[0] for 28443207 kmers; bits/key = 2.39412 - built positions[0] for 28443207 kmers; bits/key = 7.00001 - lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 26786499 - building MPHF with 8 threads and 9 partitions (avg. partition size = 3000000)... - built mphs[1] for 26786499 kmers; bits/key = 2.34704 - built positions[1] for 26786499 kmers; bits/key = 8.00001 - lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 23905222 - building MPHF with 8 threads and 8 partitions (avg. partition size = 3000000)... - built mphs[2] for 23905222 kmers; bits/key = 2.33992 - built positions[2] for 23905222 kmers; bits/key = 9.00001 - lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 20445613 - building MPHF with 8 threads and 7 partitions (avg. partition size = 3000000)... - built mphs[3] for 20445613 kmers; bits/key = 2.37543 - built positions[3] for 20445613 kmers; bits/key = 10 - lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 19605913 - building MPHF with 8 threads and 7 partitions (avg. partition size = 3000000)... - built mphs[4] for 19605913 kmers; bits/key = 2.40323 - built positions[4] for 19605913 kmers; bits/key = 11 - lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 17586499 - building MPHF with 8 threads and 6 partitions (avg. partition size = 3000000)... - built mphs[5] for 17586499 kmers; bits/key = 2.37019 - built positions[5] for 17586499 kmers; bits/key = 12 - lower = 4096; upper = 79613; num_bits_per_pos = 17; num_kmers_in_partition = 41784999 - building MPHF with 8 threads and 14 partitions (avg. partition size = 3000000)... - built mphs[6] for 41784999 kmers; bits/key = 2.34184 - built positions[6] for 41784999 kmers; bits/key = 17 -num_bits_for_skew_index 2392171696(0.863348 [bits/kmer]) -=== step 4: 'build_skew_index' 56.036 [sec] (20.2237 [ns/kmer]) -=== total_time 110.461 [sec] (39.8661 [ns/kmer]) -total index size: 1797885300 [B] -- 1797.89 [MB] -SPACE BREAKDOWN: - minimizers: 0.136439 [bits/kmer] (2.62188 [bits/key]) -- 2.62842% - pieces: 0.015002 [bits/kmer] -- 0.289004% - sizes: 0.0902443 [bits/kmer] -- 1.7385% - offsets: 1.94808 [bits/kmer] -- 37.5285% - strings: 2.13782 [bits/kmer] -- 41.1837% - skew_index: 0.863348 [bits/kmer] -- 16.6318% - weights: 5.31253e-07 [bits/kmer] -- 1.02342e-05% - -------------- - total: 5.19093 [bits/kmer] - === bucket statistics (less) === -buckets with 1 minimizer positions = 97.1114% -buckets with 2 minimizer positions = 1.66722% -buckets with 3 minimizer positions = 0.46733% -buckets with 4 minimizer positions = 0.21225% -buckets with 5 minimizer positions = 0.119966% -buckets with 6 minimizer positions = 0.0769385% -buckets with 7 minimizer positions = 0.0535934% -buckets with 8 minimizer positions = 0.0393088% -buckets with 9 minimizer positions = 0.0300556% -buckets with 10 minimizer positions = 0.024146% -buckets with 11 minimizer positions = 0.0194904% -buckets with 12 minimizer positions = 0.0157328% -buckets with 13 minimizer positions = 0.0133685% -buckets with 14 minimizer positions = 0.011428% -buckets with 15 minimizer positions = 0.00992723% -buckets with 16 minimizer positions = 0.00851728% -max_bucket_size 79613 -2025-08-23 09:24:50: saving data structure to disk... -2025-08-23 09:24:51: DONE diff --git a/benchmarks/results-22-08-25/k63/22-08-25.canon.high-hit.streaming_query_log b/benchmarks/results-22-08-25/k63/22-08-25.canon.high-hit.streaming_query_log deleted file mode 100644 index b2718c2..0000000 --- a/benchmarks/results-22-08-25/k63/22-08-25.canon.high-hit.streaming_query_log +++ /dev/null @@ -1,30 +0,0 @@ -2025-08-23 09:34:27: performing queries from file '/home/giulio/sshash_queries/SRR12858649.fastq.gz'... -2025-08-23 09:34:42: DONE -==== query report: -num_kmers = 97972416 -num_positive_kmers = 67275966 (68.6683%) -num_negative_kmers = 30696450 (31.3317%) -num_invalid_kmers = 0 (0%) -num_searches = 34555279/67275966 (51.3635%) -num_extensions = 32720687/67275966 (48.6365%) -elapsed = 15679.6 millisec / 15.6796 sec / 0.261326 min / 160.041 ns/kmer -2025-08-23 09:34:43: performing queries from file '/home/giulio/sshash_queries/SRR11449743_1.fastq.gz'... -2025-08-23 09:35:47: DONE -==== query report: -num_kmers = 461383839 -num_positive_kmers = 293470517 (63.6066%) -num_negative_kmers = 167902332 (36.391%) -num_invalid_kmers = 10990 (0.00238196%) -num_searches = 149842881/293470517 (51.0589%) -num_extensions = 143627636/293470517 (48.9411%) -elapsed = 64725 millisec / 64.725 sec / 1.07875 min / 140.284 ns/kmer -2025-08-23 09:35:48: performing queries from file '/home/giulio/sshash_queries/SRR5833294.fastq.gz'... -2025-08-23 09:38:00: DONE -==== query report: -num_kmers = 477818474 -num_positive_kmers = 406484348 (85.0709%) -num_negative_kmers = 70660348 (14.7881%) -num_invalid_kmers = 673778 (0.141011%) -num_searches = 220326145/406484348 (54.2029%) -num_extensions = 186158203/406484348 (45.7971%) -elapsed = 131771 millisec / 131.771 sec / 2.19619 min / 275.777 ns/kmer diff --git a/benchmarks/results-22-08-25/k63/22-08-25.canon.low-hit.streaming_query_log b/benchmarks/results-22-08-25/k63/22-08-25.canon.low-hit.streaming_query_log deleted file mode 100644 index ea51d30..0000000 --- a/benchmarks/results-22-08-25/k63/22-08-25.canon.low-hit.streaming_query_log +++ /dev/null @@ -1,30 +0,0 @@ -2025-08-23 09:39:25: performing queries from file '/home/giulio/sshash_queries/SRR11449743_1.fastq.gz'... -2025-08-23 09:40:04: DONE -==== query report: -num_kmers = 461383839 -num_positive_kmers = 756097 (0.163876%) -num_negative_kmers = 460616752 (99.8337%) -num_invalid_kmers = 10990 (0.00238196%) -num_searches = 631375/756097 (83.5045%) -num_extensions = 124722/756097 (16.4955%) -elapsed = 38764.7 millisec / 38.7647 sec / 0.646078 min / 84.0183 ns/kmer -2025-08-23 09:40:04: performing queries from file '/home/giulio/sshash_queries/SRR12858649.fastq.gz'... -2025-08-23 09:40:09: DONE -==== query report: -num_kmers = 97972416 -num_positive_kmers = 41066 (0.0419159%) -num_negative_kmers = 97931350 (99.9581%) -num_invalid_kmers = 0 (0%) -num_searches = 39909/41066 (97.1826%) -num_extensions = 1157/41066 (2.81742%) -elapsed = 5264.96 millisec / 5.26496 sec / 0.0877494 min / 53.7392 ns/kmer -2025-08-23 09:40:10: performing queries from file '/home/giulio/sshash_queries/SRR5901135_1.fastq.gz'... -2025-08-23 09:40:33: DONE -==== query report: -num_kmers = 322085785 -num_positive_kmers = 8 (2.48381e-06%) -num_negative_kmers = 322073557 (99.9962%) -num_invalid_kmers = 12220 (0.00379402%) -num_searches = 7/8 (87.5%) -num_extensions = 1/8 (12.5%) -elapsed = 23143.3 millisec / 23.1433 sec / 0.385721 min / 71.8544 ns/kmer diff --git a/benchmarks/results-22-08-25/k63/22-08-25.regular.bench_log b/benchmarks/results-22-08-25/k63/22-08-25.regular.bench_log deleted file mode 100644 index 03359dd..0000000 --- a/benchmarks/results-22-08-25/k63/22-08-25.regular.bench_log +++ /dev/null @@ -1,18 +0,0 @@ -avg_nanosec_per_positive_lookup 1034.49 -avg_nanosec_per_negative_lookup 896.958 -avg_nanosec_per_positive_lookup_advanced 1014.59 -avg_nanosec_per_negative_lookup_advanced 890.945 -avg_nanosec_per_access 377.176 -iterator: avg_nanosec_per_kmer 19.9634 -avg_nanosec_per_positive_lookup 972.898 -avg_nanosec_per_negative_lookup 1060.07 -avg_nanosec_per_positive_lookup_advanced 960.465 -avg_nanosec_per_negative_lookup_advanced 1061.94 -avg_nanosec_per_access 320.355 -iterator: avg_nanosec_per_kmer 19.891 -avg_nanosec_per_positive_lookup 1409.64 -avg_nanosec_per_negative_lookup 1217.53 -avg_nanosec_per_positive_lookup_advanced 1396.28 -avg_nanosec_per_negative_lookup_advanced 1220.85 -avg_nanosec_per_access 496.434 -iterator: avg_nanosec_per_kmer 19.9095 diff --git a/benchmarks/results-22-08-25/k63/22-08-25.regular.build_log b/benchmarks/results-22-08-25/k63/22-08-25.regular.build_log deleted file mode 100644 index ac3c70d..0000000 --- a/benchmarks/results-22-08-25/k63/22-08-25.regular.build_log +++ /dev/null @@ -1,369 +0,0 @@ -k = 63, m = 24, seed = 1, num_threads = 8, ram_limit_in_GiB = 16, l = 6, lambda = 7, canonical = false, weighted = false, verbose = true -reading file '/home/giulio/sshash_datasets/cod.k63.unitigs.fa.ust.fa.gz'... -read 100000 sequences, 63806937 bases, 57606999 kmers -read 200000 sequences, 129066588 bases, 116666650 kmers -read 300000 sequences, 192862556 bases, 174262618 kmers -read 400000 sequences, 256212661 bases, 231412723 kmers -read 500000 sequences, 319282064 bases, 288282126 kmers -read 600000 sequences, 379117165 bases, 341917227 kmers -read 700000 sequences, 436654305 bases, 393254367 kmers -read 800000 sequences, 492876125 bases, 443276187 kmers -read 900000 sequences, 547344740 bases, 491544802 kmers -read 1000000 sequences, 598624540 bases, 536624602 kmers -=== step 1.1: 'encoding_input' 3.10708 [sec] (5.58238 [ns/kmer]) -read 1049410 sequences, 621649078 bases, 556585658 kmers -num_kmers 556585658 -cost: 2.0 + 0.233795 [bits/kmer] -saving to file 'tmp_dir/sshash.tmp.run_1755933599801487843.minimizers.0.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933599801487843.minimizers.1.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933599801487843.minimizers.2.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933599801487843.minimizers.3.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933599801487843.minimizers.4.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933599801487843.minimizers.5.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933599801487843.minimizers.6.bin'... -=== step 1.2: 'computing_minimizers_tuples' 1.56406 [sec] (2.81009 [ns/kmer]) -=== step 1: 'parse_file' 4.67165 [sec] (8.39341 [ns/kmer]) - == files to merge = 7 -saving tuples to 'tmp_dir/sshash.tmp.run_1755933599801487843.minimizers.bin' -num_minimizers = 24248598 -num_minimizer_positions = 27934907 -num_super_kmers = 27934907 -building minimizers MPHF with 8 threads and 9 partitions (avg. partition size = 3000000)... -=== step 2: 'build_minimizers' 3.46371 [sec] (6.22314 [ns/kmer]) -re-sorting minimizer tuples... -saving to file 'tmp_dir/sshash.tmp.run_1755933599801487843.minimizers.0.bin'... -=== step 2.1: 're-sorting minimizers tuples' 1.76154 [sec] (3.16489 [ns/kmer]) -bits_per_offset = ceil(log2(621649142)) = 30 -reading from 'tmp_dir/sshash.tmp.run_1755933599801487843.minimizers.bin'... -sorting buffer... -saving to file 'tmp_dir/sshash.tmp.run_1755933609722074208.bucket_pairs.0.bin'... -num_singletons 23245919/24248598 (95.865%) -building: 0.069716 [sec] -computing minimizers offsets: 0.113133 [sec] -encoding: 0.008816 [sec] -=== step 3: 'build_sparse_index' 0.316554 [sec] (0.568743 [ns/kmer]) -max_bucket_size 125977 -log2_max_bucket_size 17 -num_buckets_in_skew_index 3708/24248598 (0.0152916%) -num_partitions 7 -computing sizes of partitions... - partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 3934591 - partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 2384651 - partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 1364707 - partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 1027535 - partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 960390 - partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 1043360 - partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 125977: 2191893 -num_kmers_in_skew_index 12907127 (2.31898%) -building partitions... - lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 3934591 - building MPHF with 8 threads and 2 partitions (avg. partition size = 3000000)... - built mphs[0] for 3934591 kmers; bits/key = 2.76002 - built positions[0] for 3934591 kmers; bits/key = 7.00008 - lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 2384651 - building MPHF with 8 threads and 1 partitions (avg. partition size = 2384651)... - built mphs[1] for 2384651 kmers; bits/key = 2.34627 - built positions[1] for 2384651 kmers; bits/key = 8.00015 - lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 1364707 - building MPHF with 8 threads and 1 partitions (avg. partition size = 1364707)... - built mphs[2] for 1364707 kmers; bits/key = 2.3775 - built positions[2] for 1364707 kmers; bits/key = 9.00024 - lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 1027535 - building MPHF with 8 threads and 1 partitions (avg. partition size = 1027535)... - built mphs[3] for 1027535 kmers; bits/key = 2.35714 - built positions[3] for 1027535 kmers; bits/key = 10.0004 - lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 960390 - building MPHF with 8 threads and 1 partitions (avg. partition size = 960390)... - built mphs[4] for 960390 kmers; bits/key = 2.36344 - built positions[4] for 960390 kmers; bits/key = 11.0004 - lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 1043360 - building MPHF with 8 threads and 1 partitions (avg. partition size = 1043360)... - built mphs[5] for 1043360 kmers; bits/key = 2.3558 - built positions[5] for 1043360 kmers; bits/key = 12.0003 - lower = 4096; upper = 125977; num_bits_per_pos = 17; num_kmers_in_partition = 2191893 - building MPHF with 8 threads and 1 partitions (avg. partition size = 2191893)... - built mphs[6] for 2191893 kmers; bits/key = 2.349 - built positions[6] for 2191893 kmers; bits/key = 17.0002 -num_bits_for_skew_index 161524192(0.290205 [bits/kmer]) -=== step 4: 'build_skew_index' 11.9882 [sec] (21.5388 [ns/kmer]) -=== total_time 22.2016 [sec] (39.889 [ns/kmer]) -total index size: 295698804 [B] -- 295.699 [MB] -SPACE BREAKDOWN: - minimizers: 0.122343 [bits/kmer] (2.80819 [bits/key]) -- 2.87855% - pieces: 0.0233256 [bits/kmer] -- 0.548814% - sizes: 0.0748157 [bits/kmer] -- 1.76029% - offsets: 1.50569 [bits/kmer] -- 35.4266% - strings: 2.2338 [bits/kmer] -- 52.5576% - skew_index: 0.290205 [bits/kmer] -- 6.82807% - weights: 2.6447e-06 [bits/kmer] -- 6.22255e-05% - -------------- - total: 4.25018 [bits/kmer] - === bucket statistics (less) === -buckets with 1 minimizer positions = 95.865% -buckets with 2 minimizer positions = 2.37909% -buckets with 3 minimizer positions = 0.729951% -buckets with 4 minimizer positions = 0.320592% -buckets with 5 minimizer positions = 0.176509% -buckets with 6 minimizer positions = 0.110357% -buckets with 7 minimizer positions = 0.0750971% -buckets with 8 minimizer positions = 0.0535371% -buckets with 9 minimizer positions = 0.0403364% -buckets with 10 minimizer positions = 0.0317585% -buckets with 11 minimizer positions = 0.0251602% -buckets with 12 minimizer positions = 0.0203888% -buckets with 13 minimizer positions = 0.0167927% -buckets with 14 minimizer positions = 0.0142235% -buckets with 15 minimizer positions = 0.0120502% -buckets with 16 minimizer positions = 0.0107346% -max_bucket_size 125977 -2025-08-23 09:20:22: saving data structure to disk... -2025-08-23 09:20:22: DONE -k = 63, m = 24, seed = 1, num_threads = 8, ram_limit_in_GiB = 16, l = 6, lambda = 7, canonical = false, weighted = false, verbose = true -reading file '/home/giulio/sshash_datasets/kestrel.k63.unitigs.fa.ust.fa.gz'... -read 100000 sequences, 796964911 bases, 790764973 kmers -=== step 1.1: 'encoding_input' 5.86916 [sec] (5.08042 [ns/kmer]) -read 158680 sequences, 1165088827 bases, 1155250667 kmers -num_kmers 1155250667 -cost: 2.0 + 0.0170321 [bits/kmer] -saving to file 'tmp_dir/sshash.tmp.run_1755933622178164247.minimizers.0.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933622178164247.minimizers.1.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933622178164247.minimizers.2.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933622178164247.minimizers.3.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933622178164247.minimizers.4.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933622178164247.minimizers.5.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933622178164247.minimizers.6.bin'... -=== step 1.2: 'computing_minimizers_tuples' 3.06841 [sec] (2.65605 [ns/kmer]) -=== step 1: 'parse_file' 8.93806 [sec] (7.73691 [ns/kmer]) - == files to merge = 7 -saving tuples to 'tmp_dir/sshash.tmp.run_1755933622178164247.minimizers.bin' -num_super_kmers = 50000000 -num_minimizers = 55336139 -num_minimizer_positions = 55891145 -num_super_kmers = 55891145 -building minimizers MPHF with 8 threads and 19 partitions (avg. partition size = 3000000)... -=== step 2: 'build_minimizers' 6.75738 [sec] (5.84928 [ns/kmer]) -re-sorting minimizer tuples... -saving to file 'tmp_dir/sshash.tmp.run_1755933622178164247.minimizers.0.bin'... -=== step 2.1: 're-sorting minimizers tuples' 2.92471 [sec] (2.53167 [ns/kmer]) -bits_per_offset = ceil(log2(1165088891)) = 31 -reading from 'tmp_dir/sshash.tmp.run_1755933622178164247.minimizers.bin'... -sorting buffer... -saving to file 'tmp_dir/sshash.tmp.run_1755933640842386460.bucket_pairs.0.bin'... -num_singletons 55120943/55336139 (99.6111%) -building: 0.140172 [sec] -computing minimizers offsets: 0.126715 [sec] -encoding: 0.001673 [sec] -=== step 3: 'build_sparse_index' 0.451677 [sec] (0.390977 [ns/kmer]) -max_bucket_size 2412 -log2_max_bucket_size 12 -num_buckets_in_skew_index 483/55336139 (0.000872847%) -num_partitions 6 -computing sizes of partitions... - partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 558629 - partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 454245 - partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 172997 - partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 117563 - partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 24468 - partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 2412: 34188 -num_kmers_in_skew_index 1362090 (0.117904%) -building partitions... - lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 558629 - building MPHF with 8 threads and 1 partitions (avg. partition size = 558629)... - built mphs[0] for 558629 kmers; bits/key = 2.42488 - built positions[0] for 558629 kmers; bits/key = 7.00068 - lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 454245 - building MPHF with 8 threads and 1 partitions (avg. partition size = 454245)... - built mphs[1] for 454245 kmers; bits/key = 2.35556 - built positions[1] for 454245 kmers; bits/key = 8.00076 - lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 172997 - building MPHF with 8 threads and 1 partitions (avg. partition size = 172997)... - built mphs[2] for 172997 kmers; bits/key = 2.50881 - built positions[2] for 172997 kmers; bits/key = 9.00196 - lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 117563 - building MPHF with 8 threads and 1 partitions (avg. partition size = 117563)... - built mphs[3] for 117563 kmers; bits/key = 2.6113 - built positions[3] for 117563 kmers; bits/key = 10.0031 - lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 24468 - building MPHF with 8 threads and 1 partitions (avg. partition size = 24468)... - built mphs[4] for 24468 kmers; bits/key = 3.11329 - built positions[4] for 24468 kmers; bits/key = 11.0145 - lower = 2048; upper = 2412; num_bits_per_pos = 12; num_kmers_in_partition = 34188 - building MPHF with 8 threads and 1 partitions (avg. partition size = 34188)... - built mphs[5] for 34188 kmers; bits/key = 2.92079 - built positions[5] for 34188 kmers; bits/key = 12.0108 -num_bits_for_skew_index 14300368(0.0123786 [bits/kmer]) -=== step 4: 'build_skew_index' 2.03785 [sec] (1.76399 [ns/kmer]) -=== total_time 21.1097 [sec] (18.2728 [ns/kmer]) -total index size: 539043674 [B] -- 539.044 [MB] -SPACE BREAKDOWN: - minimizers: 0.126057 [bits/kmer] (2.63168 [bits/key]) -- 3.37698% - pieces: 0.00224854 [bits/kmer] -- 0.060237% - sizes: 0.0753247 [bits/kmer] -- 2.0179% - offsets: 1.49978 [bits/kmer] -- 40.1782% - strings: 2.01703 [bits/kmer] -- 54.035% - skew_index: 0.0123786 [bits/kmer] -- 0.331614% - weights: 1.27418e-06 [bits/kmer] -- 3.41345e-05% - -------------- - total: 3.73283 [bits/kmer] - === bucket statistics (less) === -buckets with 1 minimizer positions = 99.6111% -buckets with 2 minimizer positions = 0.265087% -buckets with 3 minimizer positions = 0.0543786% -buckets with 4 minimizer positions = 0.0223181% -buckets with 5 minimizer positions = 0.0120283% -buckets with 6 minimizer positions = 0.00758456% -buckets with 7 minimizer positions = 0.00500216% -buckets with 8 minimizer positions = 0.00356367% -buckets with 9 minimizer positions = 0.0027848% -buckets with 10 minimizer positions = 0.00213423% -buckets with 11 minimizer positions = 0.00165895% -buckets with 12 minimizer positions = 0.00135897% -buckets with 13 minimizer positions = 0.00110778% -buckets with 14 minimizer positions = 0.0010373% -buckets with 15 minimizer positions = 0.000778876% -buckets with 16 minimizer positions = 0.000674062% -max_bucket_size 2412 -2025-08-23 09:20:43: saving data structure to disk... -2025-08-23 09:20:43: DONE -k = 63, m = 25, seed = 1, num_threads = 8, ram_limit_in_GiB = 16, l = 6, lambda = 7, canonical = false, weighted = false, verbose = true -reading file '/home/giulio/sshash_datasets/human.k63.unitigs.fa.ust.fa.gz'... -read 100000 sequences, 105696573 bases, 99496635 kmers -read 200000 sequences, 214697764 bases, 202297826 kmers -read 300000 sequences, 314555962 bases, 295956024 kmers -read 400000 sequences, 417096019 bases, 392296081 kmers -read 500000 sequences, 531577423 bases, 500577485 kmers -read 600000 sequences, 641881766 bases, 604681828 kmers -read 700000 sequences, 753533625 bases, 710133687 kmers -read 800000 sequences, 860572728 bases, 810972790 kmers -read 900000 sequences, 966201746 bases, 910401808 kmers -read 1000000 sequences, 1074799974 bases, 1012800036 kmers -read 1100000 sequences, 1177786303 bases, 1109586365 kmers -read 1200000 sequences, 1284445481 bases, 1210045543 kmers -read 1300000 sequences, 1387959208 bases, 1307359270 kmers -read 1400000 sequences, 1496344159 bases, 1409544221 kmers -read 1500000 sequences, 1601642232 bases, 1508642294 kmers -read 1600000 sequences, 1704579905 bases, 1605379967 kmers -read 1700000 sequences, 1809734567 bases, 1704334629 kmers -read 1800000 sequences, 1913668987 bases, 1802069049 kmers -read 1900000 sequences, 2015410398 bases, 1897610460 kmers -read 2000000 sequences, 2111050791 bases, 1987050853 kmers -read 2100000 sequences, 2208479896 bases, 2078279958 kmers -read 2200000 sequences, 2297838310 bases, 2161438372 kmers -read 2300000 sequences, 2384042166 bases, 2241442228 kmers -read 2400000 sequences, 2476010359 bases, 2327210421 kmers -read 2500000 sequences, 2554605267 bases, 2399605329 kmers -read 2600000 sequences, 2632827152 bases, 2471627214 kmers -read 2700000 sequences, 2705855836 bases, 2538455898 kmers -read 2800000 sequences, 2777866668 bases, 2604266730 kmers -read 2900000 sequences, 2846376916 bases, 2666576978 kmers -read 3000000 sequences, 2913930048 bases, 2727930110 kmers -=== step 1.1: 'encoding_input' 14.9665 [sec] (5.40151 [ns/kmer]) -read 3079563 sequences, 2961741299 bases, 2770808393 kmers -num_kmers 2770808393 -cost: 2.0 + 0.137817 [bits/kmer] -saving to file 'tmp_dir/sshash.tmp.run_1755933643610613567.minimizers.0.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933643610613567.minimizers.1.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933643610613567.minimizers.2.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933643610613567.minimizers.3.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933643610613567.minimizers.4.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933643610613567.minimizers.5.bin'... -saving to file 'tmp_dir/sshash.tmp.run_1755933643610613567.minimizers.6.bin'... -=== step 1.2: 'computing_minimizers_tuples' 6.68663 [sec] (2.41324 [ns/kmer]) -=== step 1: 'parse_file' 21.6571 [sec] (7.81617 [ns/kmer]) - == files to merge = 7 -saving tuples to 'tmp_dir/sshash.tmp.run_1755933643610613567.minimizers.bin' -num_super_kmers = 50000000 -num_super_kmers = 100000000 -num_minimizers = 123313717 -num_minimizer_positions = 141776281 -num_super_kmers = 141776281 -building minimizers MPHF with 8 threads and 42 partitions (avg. partition size = 3000000)... -=== step 2: 'build_minimizers' 15.2699 [sec] (5.51099 [ns/kmer]) -re-sorting minimizer tuples... -saving to file 'tmp_dir/sshash.tmp.run_1755933643610613567.minimizers.0.bin'... -=== step 2.1: 're-sorting minimizers tuples' 7.31977 [sec] (2.64174 [ns/kmer]) -bits_per_offset = ceil(log2(2961741363)) = 32 -reading from 'tmp_dir/sshash.tmp.run_1755933643610613567.minimizers.bin'... -sorting buffer... -saving to file 'tmp_dir/sshash.tmp.run_1755933687969368724.bucket_pairs.0.bin'... -num_singletons 119993445/123313717 (97.3075%) -building: 0.337069 [sec] -computing minimizers offsets: 0.265635 [sec] -encoding: 0.030089 [sec] -=== step 3: 'build_sparse_index' 1.19223 [sec] (0.430283 [ns/kmer]) -max_bucket_size 57745 -log2_max_bucket_size 16 -num_buckets_in_skew_index 29393/123313717 (0.023836%) -num_partitions 7 -computing sizes of partitions... - partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 25252953 - partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 22813094 - partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 20384491 - partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 17949961 - partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 17766307 - partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 14306293 - partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 57745: 23435747 -num_kmers_in_skew_index 141908846 (5.12157%) -building partitions... - lower = 64; upper = 128; num_bits_per_pos = 7; num_kmers_in_partition = 25252953 - building MPHF with 8 threads and 9 partitions (avg. partition size = 3000000)... - built mphs[0] for 25252953 kmers; bits/key = 2.40003 - built positions[0] for 25252953 kmers; bits/key = 7.00001 - lower = 128; upper = 256; num_bits_per_pos = 8; num_kmers_in_partition = 22813094 - building MPHF with 8 threads and 8 partitions (avg. partition size = 3000000)... - built mphs[1] for 22813094 kmers; bits/key = 2.4057 - built positions[1] for 22813094 kmers; bits/key = 8.00001 - lower = 256; upper = 512; num_bits_per_pos = 9; num_kmers_in_partition = 20384491 - building MPHF with 8 threads and 7 partitions (avg. partition size = 3000000)... - built mphs[2] for 20384491 kmers; bits/key = 2.38124 - built positions[2] for 20384491 kmers; bits/key = 9.00002 - lower = 512; upper = 1024; num_bits_per_pos = 10; num_kmers_in_partition = 17949961 - building MPHF with 8 threads and 6 partitions (avg. partition size = 3000000)... - built mphs[3] for 17949961 kmers; bits/key = 2.33793 - built positions[3] for 17949961 kmers; bits/key = 10 - lower = 1024; upper = 2048; num_bits_per_pos = 11; num_kmers_in_partition = 17766307 - building MPHF with 8 threads and 6 partitions (avg. partition size = 3000000)... - built mphs[4] for 17766307 kmers; bits/key = 2.35372 - built positions[4] for 17766307 kmers; bits/key = 11 - lower = 2048; upper = 4096; num_bits_per_pos = 12; num_kmers_in_partition = 14306293 - building MPHF with 8 threads and 5 partitions (avg. partition size = 3000000)... - built mphs[5] for 14306293 kmers; bits/key = 2.4122 - built positions[5] for 14306293 kmers; bits/key = 12 - lower = 4096; upper = 57745; num_bits_per_pos = 16; num_kmers_in_partition = 23435747 - building MPHF with 8 threads and 8 partitions (avg. partition size = 3000000)... - built mphs[6] for 23435747 kmers; bits/key = 2.37151 - built positions[6] for 23435747 kmers; bits/key = 16 -num_bits_for_skew_index 1802215056(0.650429 [bits/kmer]) -=== step 4: 'build_skew_index' 40.6604 [sec] (14.6746 [ns/kmer]) -=== total_time 86.0994 [sec] (31.0737 [ns/kmer]) -total index size: 1604769320 [B] -- 1604.77 [MB] -SPACE BREAKDOWN: - minimizers: 0.116538 [bits/kmer] (2.61856 [bits/key]) -- 2.5152% - pieces: 0.015002 [bits/kmer] -- 0.323782% - sizes: 0.0762021 [bits/kmer] -- 1.64464% - offsets: 1.63737 [bits/kmer] -- 35.3387% - strings: 2.13782 [bits/kmer] -- 46.1397% - skew_index: 0.650429 [bits/kmer] -- 14.038% - weights: 5.31253e-07 [bits/kmer] -- 1.14658e-05% - -------------- - total: 4.63336 [bits/kmer] - === bucket statistics (less) === -buckets with 1 minimizer positions = 97.3075% -buckets with 2 minimizer positions = 1.58914% -buckets with 3 minimizer positions = 0.42399% -buckets with 4 minimizer positions = 0.18754% -buckets with 5 minimizer positions = 0.106614% -buckets with 6 minimizer positions = 0.0690572% -buckets with 7 minimizer positions = 0.0483636% -buckets with 8 minimizer positions = 0.0358646% -buckets with 9 minimizer positions = 0.0274284% -buckets with 10 minimizer positions = 0.0217713% -buckets with 11 minimizer positions = 0.0177231% -buckets with 12 minimizer positions = 0.014541% -buckets with 13 minimizer positions = 0.0122655% -buckets with 14 minimizer positions = 0.010633% -buckets with 15 minimizer positions = 0.00920011% -buckets with 16 minimizer positions = 0.00797721% -max_bucket_size 57745 -2025-08-23 09:22:09: saving data structure to disk... -2025-08-23 09:22:10: DONE diff --git a/benchmarks/results-22-08-25/k63/22-08-25.regular.high-hit.streaming_query_log b/benchmarks/results-22-08-25/k63/22-08-25.regular.high-hit.streaming_query_log deleted file mode 100644 index 7f05e44..0000000 --- a/benchmarks/results-22-08-25/k63/22-08-25.regular.high-hit.streaming_query_log +++ /dev/null @@ -1,30 +0,0 @@ -2025-08-23 09:30:11: performing queries from file '/home/giulio/sshash_queries/SRR12858649.fastq.gz'... -2025-08-23 09:30:29: DONE -==== query report: -num_kmers = 97972416 -num_positive_kmers = 67275966 (68.6683%) -num_negative_kmers = 30696450 (31.3317%) -num_invalid_kmers = 0 (0%) -num_searches = 34555279/67275966 (51.3635%) -num_extensions = 32720687/67275966 (48.6365%) -elapsed = 17741.3 millisec / 17.7413 sec / 0.295688 min / 181.084 ns/kmer -2025-08-23 09:30:29: performing queries from file '/home/giulio/sshash_queries/SRR11449743_1.fastq.gz'... -2025-08-23 09:31:43: DONE -==== query report: -num_kmers = 461383839 -num_positive_kmers = 293470517 (63.6066%) -num_negative_kmers = 167902332 (36.391%) -num_invalid_kmers = 10990 (0.00238196%) -num_searches = 149842881/293470517 (51.0589%) -num_extensions = 143627636/293470517 (48.9411%) -elapsed = 74173.7 millisec / 74.1737 sec / 1.23623 min / 160.764 ns/kmer -2025-08-23 09:31:44: performing queries from file '/home/giulio/sshash_queries/SRR5833294.fastq.gz'... -2025-08-23 09:34:26: DONE -==== query report: -num_kmers = 477818474 -num_positive_kmers = 406484348 (85.0709%) -num_negative_kmers = 70660348 (14.7881%) -num_invalid_kmers = 673778 (0.141011%) -num_searches = 220326145/406484348 (54.2029%) -num_extensions = 186158203/406484348 (45.7971%) -elapsed = 162745 millisec / 162.745 sec / 2.71242 min / 340.6 ns/kmer diff --git a/benchmarks/results-22-08-25/k63/22-08-25.regular.low-hit.streaming_query_log b/benchmarks/results-22-08-25/k63/22-08-25.regular.low-hit.streaming_query_log deleted file mode 100644 index 248d810..0000000 --- a/benchmarks/results-22-08-25/k63/22-08-25.regular.low-hit.streaming_query_log +++ /dev/null @@ -1,30 +0,0 @@ -2025-08-23 09:38:00: performing queries from file '/home/giulio/sshash_queries/SRR11449743_1.fastq.gz'... -2025-08-23 09:38:47: DONE -==== query report: -num_kmers = 461383839 -num_positive_kmers = 756097 (0.163876%) -num_negative_kmers = 460616752 (99.8337%) -num_invalid_kmers = 10990 (0.00238196%) -num_searches = 631375/756097 (83.5045%) -num_extensions = 124722/756097 (16.4955%) -elapsed = 47146.1 millisec / 47.1461 sec / 0.785768 min / 102.184 ns/kmer -2025-08-23 09:38:47: performing queries from file '/home/giulio/sshash_queries/SRR12858649.fastq.gz'... -2025-08-23 09:38:53: DONE -==== query report: -num_kmers = 97972416 -num_positive_kmers = 41066 (0.0419159%) -num_negative_kmers = 97931350 (99.9581%) -num_invalid_kmers = 0 (0%) -num_searches = 39909/41066 (97.1826%) -num_extensions = 1157/41066 (2.81742%) -elapsed = 5930.07 millisec / 5.93008 sec / 0.0988346 min / 60.528 ns/kmer -2025-08-23 09:38:54: performing queries from file '/home/giulio/sshash_queries/SRR5901135_1.fastq.gz'... -2025-08-23 09:39:25: DONE -==== query report: -num_kmers = 322085785 -num_positive_kmers = 8 (2.48381e-06%) -num_negative_kmers = 322073557 (99.9962%) -num_invalid_kmers = 12220 (0.00379402%) -num_searches = 7/8 (87.5%) -num_extensions = 1/8 (12.5%) -elapsed = 30772.8 millisec / 30.7728 sec / 0.512881 min / 95.5424 ns/kmer diff --git a/benchmarks/results-22-08-25/results.png b/benchmarks/results-22-08-25/results.png deleted file mode 100644 index bc4ec17..0000000 Binary files a/benchmarks/results-22-08-25/results.png and /dev/null differ diff --git a/benchmarks/results-27-11-25-v3/k31/canon-bench.log b/benchmarks/results-27-11-25-v3/k31/canon-bench.log new file mode 100644 index 0000000..7be1405 --- /dev/null +++ b/benchmarks/results-27-11-25-v3/k31/canon-bench.log @@ -0,0 +1,108 @@ +avg_nanosec_per_positive_lookup 700.616 +avg_nanosec_per_negative_lookup 626.943 +avg_nanosec_per_positive_lookup_advanced 690.957 +avg_nanosec_per_negative_lookup_advanced 623.486 +avg_nanosec_per_access 289.502 +iterator: avg_nanosec_per_kmer 13.5817 +avg_nanosec_per_positive_lookup 704.192 +avg_nanosec_per_negative_lookup 633.771 +avg_nanosec_per_positive_lookup_advanced 697.2 +avg_nanosec_per_negative_lookup_advanced 631.435 +avg_nanosec_per_access 294.007 +iterator: avg_nanosec_per_kmer 13.7049 +avg_nanosec_per_positive_lookup 690.87 +avg_nanosec_per_negative_lookup 629.068 +avg_nanosec_per_positive_lookup_advanced 699.277 +avg_nanosec_per_negative_lookup_advanced 620.572 +avg_nanosec_per_access 286.391 +iterator: avg_nanosec_per_kmer 13.9873 +avg_nanosec_per_positive_lookup 711.757 +avg_nanosec_per_negative_lookup 704.801 +avg_nanosec_per_positive_lookup_advanced 712.397 +avg_nanosec_per_negative_lookup_advanced 704.947 +avg_nanosec_per_access 270.911 +iterator: avg_nanosec_per_kmer 14.1877 +avg_nanosec_per_positive_lookup 712.672 +avg_nanosec_per_negative_lookup 701.529 +avg_nanosec_per_positive_lookup_advanced 712 +avg_nanosec_per_negative_lookup_advanced 701.063 +avg_nanosec_per_access 266.977 +iterator: avg_nanosec_per_kmer 13.4956 +avg_nanosec_per_positive_lookup 710.204 +avg_nanosec_per_negative_lookup 698.747 +avg_nanosec_per_positive_lookup_advanced 722.988 +avg_nanosec_per_negative_lookup_advanced 696.887 +avg_nanosec_per_access 262.977 +iterator: avg_nanosec_per_kmer 15.5531 +avg_nanosec_per_positive_lookup 942.956 +avg_nanosec_per_negative_lookup 849.007 +avg_nanosec_per_positive_lookup_advanced 952.594 +avg_nanosec_per_negative_lookup_advanced 838.399 +avg_nanosec_per_access 367.911 +iterator: avg_nanosec_per_kmer 13.7541 +avg_nanosec_per_positive_lookup 945.996 +avg_nanosec_per_negative_lookup 836.111 +avg_nanosec_per_positive_lookup_advanced 943.087 +avg_nanosec_per_negative_lookup_advanced 836.9 +avg_nanosec_per_access 366.92 +iterator: avg_nanosec_per_kmer 13.5471 +avg_nanosec_per_positive_lookup 938.95 +avg_nanosec_per_negative_lookup 836.566 +avg_nanosec_per_positive_lookup_advanced 941.974 +avg_nanosec_per_negative_lookup_advanced 841.379 +avg_nanosec_per_access 372.83 +iterator: avg_nanosec_per_kmer 13.6391 +avg_nanosec_per_positive_lookup 596.857 +avg_nanosec_per_negative_lookup 599.08 +avg_nanosec_per_positive_lookup_advanced 595.354 +avg_nanosec_per_negative_lookup_advanced 592.708 +avg_nanosec_per_access 281.541 +iterator: avg_nanosec_per_kmer 13.6549 +avg_nanosec_per_positive_lookup 604.474 +avg_nanosec_per_negative_lookup 604.22 +avg_nanosec_per_positive_lookup_advanced 601.334 +avg_nanosec_per_negative_lookup_advanced 592.762 +avg_nanosec_per_access 285.191 +iterator: avg_nanosec_per_kmer 13.5116 +avg_nanosec_per_positive_lookup 588.148 +avg_nanosec_per_negative_lookup 599.255 +avg_nanosec_per_positive_lookup_advanced 593.973 +avg_nanosec_per_negative_lookup_advanced 606.877 +avg_nanosec_per_access 286.88 +iterator: avg_nanosec_per_kmer 13.6461 +avg_nanosec_per_positive_lookup 1029.48 +avg_nanosec_per_negative_lookup 853.139 +avg_nanosec_per_positive_lookup_advanced 1026.21 +avg_nanosec_per_negative_lookup_advanced 851.519 +avg_nanosec_per_access 388.951 +iterator: avg_nanosec_per_kmer 14.9005 +avg_nanosec_per_positive_lookup 1021.87 +avg_nanosec_per_negative_lookup 855.54 +avg_nanosec_per_positive_lookup_advanced 1023 +avg_nanosec_per_negative_lookup_advanced 873.386 +avg_nanosec_per_access 390.695 +iterator: avg_nanosec_per_kmer 13.9961 +avg_nanosec_per_positive_lookup 1023.94 +avg_nanosec_per_negative_lookup 851.869 +avg_nanosec_per_positive_lookup_advanced 1029.24 +avg_nanosec_per_negative_lookup_advanced 856.582 +avg_nanosec_per_access 385.955 +iterator: avg_nanosec_per_kmer 14.0033 +avg_nanosec_per_positive_lookup 1328.29 +avg_nanosec_per_negative_lookup 1137.21 +avg_nanosec_per_positive_lookup_advanced 1317.02 +avg_nanosec_per_negative_lookup_advanced 1112.24 +avg_nanosec_per_access 602.465 +iterator: avg_nanosec_per_kmer 13.713 +avg_nanosec_per_positive_lookup 1320.87 +avg_nanosec_per_negative_lookup 1134.09 +avg_nanosec_per_positive_lookup_advanced 1332.24 +avg_nanosec_per_negative_lookup_advanced 1123.15 +avg_nanosec_per_access 601.874 +iterator: avg_nanosec_per_kmer 13.7209 +avg_nanosec_per_positive_lookup 1317.3 +avg_nanosec_per_negative_lookup 1117.27 +avg_nanosec_per_positive_lookup_advanced 1316.38 +avg_nanosec_per_negative_lookup_advanced 1123.79 +avg_nanosec_per_access 603.53 +iterator: avg_nanosec_per_kmer 13.6625 diff --git a/benchmarks/results-27-11-25-v3/k31/canon-build.log b/benchmarks/results-27-11-25-v3/k31/canon-build.log new file mode 100644 index 0000000..d2d356f --- /dev/null +++ b/benchmarks/results-27-11-25-v3/k31/canon-build.log @@ -0,0 +1,1623 @@ +k = 31, m = 20, seed = 1, l = 6, c = 3, canonical_parsing = true, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/cod.k31.eulertigs.fa.gz'... +m_buffer_size 29411764 +read 100000 sequences, 25039667 bases, 22039697 kmers +read 200000 sequences, 50140708 bases, 44140738 kmers +read 300000 sequences, 75429441 bases, 66429471 kmers +read 400000 sequences, 100861228 bases, 88861258 kmers +read 500000 sequences, 126668305 bases, 111668335 kmers +read 600000 sequences, 152842148 bases, 134842178 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278659621105956.minimizers.0.bin'... +read 700000 sequences, 179047050 bases, 158047080 kmers +read 800000 sequences, 205700376 bases, 181700406 kmers +read 900000 sequences, 232873950 bases, 205873980 kmers +read 1000000 sequences, 260757565 bases, 230757595 kmers +read 1100000 sequences, 290088622 bases, 257088652 kmers +read 1200000 sequences, 322579647 bases, 286579677 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278659621105956.minimizers.1.bin'... +read 1300000 sequences, 361073626 bases, 322073656 kmers +read 1400000 sequences, 398961301 bases, 356961331 kmers +read 1500000 sequences, 424322286 bases, 379322316 kmers +read 1600000 sequences, 449411932 bases, 401411962 kmers +read 1700000 sequences, 474428195 bases, 423428225 kmers +read 1800000 sequences, 499637061 bases, 445637091 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278659621105956.minimizers.2.bin'... +read 1900000 sequences, 524718926 bases, 467718956 kmers +read 2000000 sequences, 549832029 bases, 489832059 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278659621105956.minimizers.3.bin'... +read 2057242 sequences, 564182460 bases, 502465200 kmers +num_kmers 502465200 +num_super_kmers 99045246 +num_pieces 2057243 (+0.245658 [bits/kmer]) +=== step 1: 'parse_file' 35.8195 [sec] (71.2875 [ns/kmer]) + == files to merge = 4 +num_written_tuples = 50000000 +num_written_tuples = 99045246 +num_minimizers 86909212 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 6.09538 [sec] (12.1309 [ns/kmer]) +bits_per_offset = ceil(log2(564182491)) = 30 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278701636967223.bucket_pairs.0.bin'... +num_singletons 80440205/86909212 (92.5566%) +=== step 3: 'build_index' 17.8435 [sec] (35.512 [ns/kmer]) +max_num_super_kmers_in_bucket 22004 +log2_max_num_super_kmers_in_bucket 15 +num_buckets_in_skew_index 6151/86909212 (0.0070775%) +num_partitions 7 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 1731813 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 1148051 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 939482 + partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 749103 + partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 516253 + partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 400682 + partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 22004: 574096 +num_kmers_in_skew_index 6059480 (1.20595%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 1731813 + building MPHF with 64 threads and 86 partitions... + built mphs[0] for 1731813 keys; bits/key = 3.10989 + built positions[0] for 1731813 keys; bits/key = 7.00022 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 1148051 + building MPHF with 64 threads and 57 partitions... + built mphs[1] for 1148051 keys; bits/key = 3.01227 + built positions[1] for 1148051 keys; bits/key = 8.00031 + lower 256; upper 512; num_bits_per_pos 9; keys_in_partition.size() 939482 + building MPHF with 64 threads and 46 partitions... + built mphs[2] for 939482 keys; bits/key = 2.98842 + built positions[2] for 939482 keys; bits/key = 9.00036 + lower 512; upper 1024; num_bits_per_pos 10; keys_in_partition.size() 749103 + building MPHF with 64 threads and 37 partitions... + built mphs[3] for 749103 keys; bits/key = 2.98093 + built positions[3] for 749103 keys; bits/key = 10.0005 + lower 1024; upper 2048; num_bits_per_pos 11; keys_in_partition.size() 516253 + building MPHF with 64 threads and 25 partitions... + built mphs[4] for 516253 keys; bits/key = 2.95018 + built positions[4] for 516253 keys; bits/key = 11.0006 + lower 2048; upper 4096; num_bits_per_pos 12; keys_in_partition.size() 400682 + building MPHF with 64 threads and 20 partitions... + built mphs[5] for 400682 keys; bits/key = 2.95939 + built positions[5] for 400682 keys; bits/key = 12.0008 + lower 4096; upper 22004; num_bits_per_pos 15; keys_in_partition.size() 574096 + building MPHF with 64 threads and 28 partitions... + built mphs[6] for 574096 keys; bits/key = 2.95948 + built positions[6] for 574096 keys; bits/key = 15.0006 +num_bits_for_skew_index 74646912(0.148561 [bits/kmer]) +=== step 4: 'build_skew_index' 0.902275 [sec] (1.7957 [ns/kmer]) +=== total_time 60.6607 [sec] (120.726 [ns/kmer]) +total index size: 572200160 [B] -- 572.2 [MB] +SPACE BREAKDOWN: + minimizers: 0.462082 [bits/kmer] (2.67152 [bits/key]) -- 5.07209% + pieces: 0.0460074 [bits/kmer] -- 0.505005% + num_super_kmers_before_bucket: 0.294414 [bits/kmer] -- 3.23167% + offsets: 5.91356 [bits/kmer] -- 64.9108% + strings: 2.24566 [bits/kmer] -- 24.6497% + skew_index: 0.148561 [bits/kmer] -- 1.6307% + weights: 2.92956e-06 [bits/kmer] -- 3.21566e-05% + weight_interval_values: 5.09488e-07 [bits/kmer] + weight_interval_lengths: 1.91058e-06 [bits/kmer] + weight_dictionary: 5.09488e-07 [bits/kmer] + -------------- + total: 9.11029 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 92.5566% +buckets with 2 super_kmers = 5.94664% +buckets with 3 super_kmers = 0.722249% +buckets with 4 super_kmers = 0.278271% +buckets with 5 super_kmers = 0.141909% +buckets with 6 super_kmers = 0.084189% +buckets with 7 super_kmers = 0.054336% +buckets with 8 super_kmers = 0.037752% +buckets with 9 super_kmers = 0.0277301% +buckets with 10 super_kmers = 0.0212797% +buckets with 11 super_kmers = 0.0166691% +buckets with 12 super_kmers = 0.0132437% +buckets with 13 super_kmers = 0.0110966% +buckets with 14 super_kmers = 0.00920041% +buckets with 15 super_kmers = 0.00754465% +buckets with 16 super_kmers = 0.00658964% +max_num_super_kmers_in_bucket 22004 +2025-11-27 22:25:20: saving data structure to disk... +2025-11-27 22:25:20: DONE +k = 31, m = 20, seed = 1, l = 6, c = 3, canonical_parsing = true, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/kestrel.k31.eulertigs.fa.gz'... +m_buffer_size 29411764 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278720874064879.minimizers.0.bin'... +read 100000 sequences, 213090615 bases, 210090645 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278720874064879.minimizers.1.bin'... +read 200000 sequences, 390706022 bases, 384706052 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278720874064879.minimizers.2.bin'... +read 300000 sequences, 575071881 bases, 566071911 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278720874064879.minimizers.3.bin'... +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278720874064879.minimizers.4.bin'... +read 400000 sequences, 764532455 bases, 752532485 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278720874064879.minimizers.5.bin'... +read 500000 sequences, 971034152 bases, 956034182 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278720874064879.minimizers.6.bin'... +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278720874064879.minimizers.7.bin'... +read 582860 sequences, 1167885005 bases, 1150399205 kmers +num_kmers 1150399205 +num_super_kmers 225292355 +num_pieces 582861 (+0.0303996 [bits/kmer]) +=== step 1: 'parse_file' 80.4675 [sec] (69.9475 [ns/kmer]) + == files to merge = 8 +num_written_tuples = 50000000 +num_written_tuples = 100000000 +num_written_tuples = 150000000 +num_written_tuples = 200000000 +num_written_tuples = 225292355 +num_minimizers 212008472 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 15.8973 [sec] (13.8189 [ns/kmer]) +bits_per_offset = ceil(log2(1167885036)) = 31 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278817502740446.bucket_pairs.0.bin'... +num_singletons 201072056/212008472 (94.8415%) +=== step 3: 'build_index' 56.2443 [sec] (48.8911 [ns/kmer]) +max_num_super_kmers_in_bucket 7372 +log2_max_num_super_kmers_in_bucket 13 +num_buckets_in_skew_index 2668/212008472 (0.00125844%) +num_partitions 7 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 861838 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 486191 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 276778 + partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 158755 + partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 86161 + partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 15153 + partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 7372: 37685 +num_kmers_in_skew_index 1922561 (0.167121%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 861838 + building MPHF with 64 threads and 43 partitions... + built mphs[0] for 861838 keys; bits/key = 2.98449 + built positions[0] for 861838 keys; bits/key = 7.00041 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 486191 + building MPHF with 64 threads and 24 partitions... + built mphs[1] for 486191 keys; bits/key = 2.95601 + built positions[1] for 486191 keys; bits/key = 8.00067 + lower 256; upper 512; num_bits_per_pos 9; keys_in_partition.size() 276778 + building MPHF with 64 threads and 13 partitions... + built mphs[2] for 276778 keys; bits/key = 2.90601 + built positions[2] for 276778 keys; bits/key = 9.00118 + lower 512; upper 1024; num_bits_per_pos 10; keys_in_partition.size() 158755 + building MPHF with 64 threads and 7 partitions... + built mphs[3] for 158755 keys; bits/key = 2.86308 + built positions[3] for 158755 keys; bits/key = 10.0022 + lower 1024; upper 2048; num_bits_per_pos 11; keys_in_partition.size() 86161 + building MPHF with 64 threads and 4 partitions... + built mphs[4] for 86161 keys; bits/key = 2.9216 + built positions[4] for 86161 keys; bits/key = 11.0038 + lower 2048; upper 4096; num_bits_per_pos 12; keys_in_partition.size() 15153 + building MPHF with 64 threads and 1 partitions... + built mphs[5] for 15153 keys; bits/key = 2.98924 + built positions[5] for 15153 keys; bits/key = 12.0245 + lower 4096; upper 7372; num_bits_per_pos 13; keys_in_partition.size() 37685 + building MPHF with 64 threads and 1 partitions... + built mphs[6] for 37685 keys; bits/key = 2.74274 + built positions[6] for 37685 keys; bits/key = 13.0089 +num_bits_for_skew_index 21291600(0.018508 [bits/kmer]) +=== step 4: 'build_skew_index' 1.03843 [sec] (0.902674 [ns/kmer]) +=== total_time 153.648 [sec] (133.56 [ns/kmer]) +total index size: 1284336368 [B] -- 1284.34 [MB] +SPACE BREAKDOWN: + minimizers: 0.504602 [bits/kmer] (2.73807 [bits/key]) -- 5.64974% + pieces: 0.00740836 [bits/kmer] -- 0.0829473% + num_super_kmers_before_bucket: 0.299503 [bits/kmer] -- 3.35337% + offsets: 6.07099 [bits/kmer] -- 67.9735% + strings: 2.0304 [bits/kmer] -- 22.7332% + skew_index: 0.018508 [bits/kmer] -- 0.207224% + weights: 1.27956e-06 [bits/kmer] -- 1.43265e-05% + weight_interval_values: 2.22531e-07 [bits/kmer] + weight_interval_lengths: 8.34493e-07 [bits/kmer] + weight_dictionary: 2.22531e-07 [bits/kmer] + -------------- + total: 8.93141 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 94.8415% +buckets with 2 super_kmers = 4.84071% +buckets with 3 super_kmers = 0.187797% +buckets with 4 super_kmers = 0.0463713% +buckets with 5 super_kmers = 0.0230014% +buckets with 6 super_kmers = 0.0138645% +buckets with 7 super_kmers = 0.00932793% +buckets with 8 super_kmers = 0.00652804% +buckets with 9 super_kmers = 0.00484226% +buckets with 10 super_kmers = 0.00370363% +buckets with 11 super_kmers = 0.00290932% +buckets with 12 super_kmers = 0.00224755% +buckets with 13 super_kmers = 0.00189285% +buckets with 14 super_kmers = 0.00158814% +buckets with 15 super_kmers = 0.00133438% +buckets with 16 super_kmers = 0.00109854% +max_num_super_kmers_in_bucket 7372 +2025-11-27 22:27:54: saving data structure to disk... +2025-11-27 22:27:55: DONE +k = 31, m = 21, seed = 1, l = 6, c = 3, canonical_parsing = true, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +m_buffer_size 29411764 +read 100000 sequences, 24154188 bases, 21154218 kmers +read 200000 sequences, 48616692 bases, 42616722 kmers +read 300000 sequences, 73131027 bases, 64131057 kmers +read 400000 sequences, 97783723 bases, 85783753 kmers +read 500000 sequences, 122219519 bases, 107219549 kmers +read 600000 sequences, 146714842 bases, 128714872 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278875702441678.minimizers.0.bin'... +read 700000 sequences, 171233673 bases, 150233703 kmers +read 800000 sequences, 195693297 bases, 171693327 kmers +read 900000 sequences, 220477596 bases, 193477626 kmers +read 1000000 sequences, 245136480 bases, 215136510 kmers +read 1100000 sequences, 269861488 bases, 236861518 kmers +read 1200000 sequences, 294553400 bases, 258553430 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278875702441678.minimizers.1.bin'... +read 1300000 sequences, 319281499 bases, 280281529 kmers +read 1400000 sequences, 344258826 bases, 302258856 kmers +read 1500000 sequences, 368900321 bases, 323900351 kmers +read 1600000 sequences, 393909637 bases, 345909667 kmers +read 1700000 sequences, 418576764 bases, 367576794 kmers +read 1800000 sequences, 443411012 bases, 389411042 kmers +read 1900000 sequences, 468034254 bases, 411034284 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278875702441678.minimizers.2.bin'... +read 2000000 sequences, 492922181 bases, 432922211 kmers +read 2100000 sequences, 517852856 bases, 454852886 kmers +read 2200000 sequences, 542402258 bases, 476402288 kmers +read 2300000 sequences, 567260857 bases, 498260887 kmers +read 2400000 sequences, 592478462 bases, 520478492 kmers +read 2500000 sequences, 617295027 bases, 542295057 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278875702441678.minimizers.3.bin'... +read 2600000 sequences, 642487995 bases, 564488025 kmers +read 2700000 sequences, 667681489 bases, 586681519 kmers +read 2800000 sequences, 693019221 bases, 609019251 kmers +read 2900000 sequences, 718419082 bases, 631419112 kmers +read 3000000 sequences, 743136645 bases, 653136675 kmers +read 3100000 sequences, 768135598 bases, 675135628 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278875702441678.minimizers.4.bin'... +read 3200000 sequences, 793535023 bases, 697535053 kmers +read 3300000 sequences, 819156616 bases, 720156646 kmers +read 3400000 sequences, 844256779 bases, 742256809 kmers +read 3500000 sequences, 869741880 bases, 764741910 kmers +read 3600000 sequences, 895152151 bases, 787152181 kmers +read 3700000 sequences, 920884314 bases, 809884344 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278875702441678.minimizers.5.bin'... +read 3800000 sequences, 946277252 bases, 832277282 kmers +read 3900000 sequences, 972103084 bases, 855103114 kmers +read 4000000 sequences, 997901794 bases, 877901824 kmers +read 4100000 sequences, 1023962565 bases, 900962595 kmers +read 4200000 sequences, 1050002905 bases, 924002935 kmers +read 4300000 sequences, 1076025926 bases, 947025956 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278875702441678.minimizers.6.bin'... +read 4400000 sequences, 1101901550 bases, 969901580 kmers +read 4500000 sequences, 1127998210 bases, 992998240 kmers +read 4600000 sequences, 1153713252 bases, 1015713282 kmers +read 4700000 sequences, 1179840867 bases, 1038840897 kmers +read 4800000 sequences, 1205900933 bases, 1061900963 kmers +read 4900000 sequences, 1232271094 bases, 1085271124 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278875702441678.minimizers.7.bin'... +read 5000000 sequences, 1259012297 bases, 1109012327 kmers +read 5100000 sequences, 1285390407 bases, 1132390437 kmers +read 5200000 sequences, 1312574199 bases, 1156574229 kmers +read 5300000 sequences, 1339714447 bases, 1180714477 kmers +read 5400000 sequences, 1366712530 bases, 1204712560 kmers +read 5500000 sequences, 1394310431 bases, 1229310461 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278875702441678.minimizers.8.bin'... +read 5600000 sequences, 1421869864 bases, 1253869894 kmers +read 5700000 sequences, 1449547608 bases, 1278547638 kmers +read 5800000 sequences, 1477685978 bases, 1303686008 kmers +read 5900000 sequences, 1505662869 bases, 1328662899 kmers +read 6000000 sequences, 1534165192 bases, 1354165222 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278875702441678.minimizers.9.bin'... +read 6100000 sequences, 1562404847 bases, 1379404877 kmers +read 6200000 sequences, 1591527035 bases, 1405527065 kmers +read 6300000 sequences, 1620530378 bases, 1431530408 kmers +read 6400000 sequences, 1650356135 bases, 1458356165 kmers +read 6500000 sequences, 1680100604 bases, 1485100634 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278875702441678.minimizers.10.bin'... +read 6600000 sequences, 1709838161 bases, 1511838191 kmers +read 6700000 sequences, 1739768824 bases, 1538768854 kmers +read 6800000 sequences, 1771033237 bases, 1567033267 kmers +read 6900000 sequences, 1802734155 bases, 1595734185 kmers +read 7000000 sequences, 1835088122 bases, 1625088152 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278875702441678.minimizers.11.bin'... +read 7100000 sequences, 1868202990 bases, 1655203020 kmers +read 7200000 sequences, 1901851887 bases, 1685851917 kmers +read 7300000 sequences, 1936102636 bases, 1717102666 kmers +read 7400000 sequences, 1971764013 bases, 1749764043 kmers +read 7500000 sequences, 2008379618 bases, 1783379648 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278875702441678.minimizers.12.bin'... +read 7600000 sequences, 2046720278 bases, 1818720308 kmers +read 7700000 sequences, 2086661084 bases, 1855661114 kmers +read 7800000 sequences, 2129062919 bases, 1895062949 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278875702441678.minimizers.13.bin'... +read 7900000 sequences, 2174787944 bases, 1937787974 kmers +read 8000000 sequences, 2224822145 bases, 1984822175 kmers +read 8100000 sequences, 2283235897 bases, 2040235927 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278875702441678.minimizers.14.bin'... +read 8200000 sequences, 2311905284 bases, 2065905314 kmers +read 8300000 sequences, 2336057180 bases, 2087057210 kmers +read 8400000 sequences, 2360509664 bases, 2108509694 kmers +read 8500000 sequences, 2384831256 bases, 2129831286 kmers +read 8600000 sequences, 2409417232 bases, 2151417262 kmers +read 8700000 sequences, 2433616800 bases, 2172616830 kmers +read 8800000 sequences, 2458186051 bases, 2194186081 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278875702441678.minimizers.15.bin'... +read 8900000 sequences, 2482411631 bases, 2215411661 kmers +read 9000000 sequences, 2506972327 bases, 2236972357 kmers +read 9100000 sequences, 2531678245 bases, 2258678275 kmers +read 9200000 sequences, 2555989557 bases, 2279989587 kmers +read 9300000 sequences, 2580569574 bases, 2301569604 kmers +read 9400000 sequences, 2605362813 bases, 2323362843 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278875702441678.minimizers.16.bin'... +read 9500000 sequences, 2629786923 bases, 2344786953 kmers +read 9600000 sequences, 2654330633 bases, 2366330663 kmers +read 9700000 sequences, 2679002198 bases, 2388002228 kmers +read 9800000 sequences, 2703471555 bases, 2409471585 kmers +read 9900000 sequences, 2727977401 bases, 2430977431 kmers +read 10000000 sequences, 2751909350 bases, 2451909380 kmers +read 10100000 sequences, 2776332527 bases, 2473332557 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278875702441678.minimizers.17.bin'... +read 10200000 sequences, 2800726002 bases, 2494726032 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278875702441678.minimizers.18.bin'... +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +num_super_kmers 535721942 +num_pieces 10250466 (+0.245454 [bits/kmer]) +=== step 1: 'parse_file' 172.811 [sec] (68.9679 [ns/kmer]) + == files to merge = 19 +num_written_tuples = 50000000 +num_written_tuples = 100000000 +num_written_tuples = 150000000 +num_written_tuples = 200000000 +num_written_tuples = 250000000 +num_written_tuples = 300000000 +num_written_tuples = 350000000 +num_written_tuples = 400000000 +num_written_tuples = 450000000 +num_written_tuples = 500000000 +num_written_tuples = 535721942 +num_minimizers 465919362 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 39.4847 [sec] (15.7581 [ns/kmer]) +bits_per_offset = ceil(log2(2813192661)) = 32 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279088618654493.bucket_pairs.0.bin'... +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279088618654493.bucket_pairs.1.bin'... +num_singletons 433242759/465919362 (92.9866%) + == files to merge = 2 +num_written_pairs = 32676603 +=== step 3: 'build_index' 141.348 [sec] (56.4112 [ns/kmer]) +max_num_super_kmers_in_bucket 32249 +log2_max_num_super_kmers_in_bucket 15 +num_buckets_in_skew_index 63334/465919362 (0.0135933%) +num_partitions 7 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 15191124 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 11100430 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 7610580 + partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 4687038 + partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 3133730 + partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 1594207 + partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 32249: 1243030 +num_kmers_in_skew_index 44560139 (1.77837%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 15191124 + building MPHF with 64 threads and 256 partitions... + built mphs[0] for 15191124 keys; bits/key = 2.94094 + built positions[0] for 15191124 keys; bits/key = 7.00002 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 11100430 + building MPHF with 64 threads and 256 partitions... + built mphs[1] for 11100430 keys; bits/key = 2.99917 + built positions[1] for 11100430 keys; bits/key = 8.00003 + lower 256; upper 512; num_bits_per_pos 9; keys_in_partition.size() 7610580 + building MPHF with 64 threads and 256 partitions... + built mphs[2] for 7610580 keys; bits/key = 3.05914 + built positions[2] for 7610580 keys; bits/key = 9.00004 + lower 512; upper 1024; num_bits_per_pos 10; keys_in_partition.size() 4687038 + building MPHF with 64 threads and 256 partitions... + built mphs[3] for 4687038 keys; bits/key = 3.21486 + built positions[3] for 4687038 keys; bits/key = 10.0001 + lower 1024; upper 2048; num_bits_per_pos 11; keys_in_partition.size() 3133730 + building MPHF with 64 threads and 256 partitions... + built mphs[4] for 3133730 keys; bits/key = 3.32063 + built positions[4] for 3133730 keys; bits/key = 11.0001 + lower 2048; upper 4096; num_bits_per_pos 12; keys_in_partition.size() 1594207 + building MPHF with 64 threads and 79 partitions... + built mphs[5] for 1594207 keys; bits/key = 3.09712 + built positions[5] for 1594207 keys; bits/key = 12.0002 + lower 4096; upper 32249; num_bits_per_pos 15; keys_in_partition.size() 1243030 + building MPHF with 64 threads and 62 partitions... + built mphs[6] for 1243030 keys; bits/key = 3.0308 + built positions[6] for 1243030 keys; bits/key = 15.0003 +num_bits_for_skew_index 518185552(0.206804 [bits/kmer]) +=== step 4: 'build_skew_index' 6.26459 [sec] (2.50016 [ns/kmer]) +=== total_time 359.909 [sec] (143.637 [ns/kmer]) +total index size: 3180099170 [B] -- 3180.1 [MB] +SPACE BREAKDOWN: + minimizers: 0.494925 [bits/kmer] (2.66167 [bits/key]) -- 4.87455% + pieces: 0.0459722 [bits/kmer] -- 0.452783% + num_super_kmers_before_bucket: 0.318398 [bits/kmer] -- 3.13592% + offsets: 6.8417 [bits/kmer] -- 67.3843% + strings: 2.24545 [bits/kmer] -- 22.1156% + skew_index: 0.206804 [bits/kmer] -- 2.03683% + weights: 5.87466e-07 [bits/kmer] -- 5.78598e-06% + weight_interval_values: 1.02168e-07 [bits/kmer] + weight_interval_lengths: 3.8313e-07 [bits/kmer] + weight_dictionary: 1.02168e-07 [bits/kmer] + -------------- + total: 10.1533 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 92.9866% +buckets with 2 super_kmers = 5.6374% +buckets with 3 super_kmers = 0.597685% +buckets with 4 super_kmers = 0.239291% +buckets with 5 super_kmers = 0.131087% +buckets with 6 super_kmers = 0.0829399% +buckets with 7 super_kmers = 0.0571751% +buckets with 8 super_kmers = 0.0415825% +buckets with 9 super_kmers = 0.0314344% +buckets with 10 super_kmers = 0.0244089% +buckets with 11 super_kmers = 0.0195858% +buckets with 12 super_kmers = 0.0160863% +buckets with 13 super_kmers = 0.0133457% +buckets with 14 super_kmers = 0.0112569% +buckets with 15 super_kmers = 0.00953792% +buckets with 16 super_kmers = 0.00818575% +max_num_super_kmers_in_bucket 32249 +2025-11-27 22:33:56: saving data structure to disk... +2025-11-27 22:33:58: DONE +k = 31, m = 19, seed = 1, l = 6, c = 3, canonical_parsing = true, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k31.eulertigs.fa.gz'... +m_buffer_size 29411764 +read 100000 sequences, 25244517 bases, 22244547 kmers +read 200000 sequences, 51983063 bases, 45983093 kmers +read 300000 sequences, 83502239 bases, 74502269 kmers +read 400000 sequences, 125957954 bases, 113957984 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279238485636807.minimizers.0.bin'... +read 500000 sequences, 196376539 bases, 181376569 kmers +read 600000 sequences, 214563709 bases, 196563739 kmers +read 700000 sequences, 233311986 bases, 212312016 kmers +read 800000 sequences, 251082581 bases, 227082611 kmers +read 900000 sequences, 269982481 bases, 242982511 kmers +read 1000000 sequences, 289026734 bases, 259026764 kmers +read 1100000 sequences, 308418289 bases, 275418319 kmers +read 1200000 sequences, 328185212 bases, 292185242 kmers +read 1300000 sequences, 349175412 bases, 310175442 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279238485636807.minimizers.1.bin'... +read 1400000 sequences, 370064780 bases, 328064810 kmers +read 1500000 sequences, 391112878 bases, 346112908 kmers +read 1600000 sequences, 413509635 bases, 365509665 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279238485636807.minimizers.2.bin'... +read 1645464 sequences, 425569105 bases, 376205185 kmers +num_kmers 376205185 +num_super_kmers 70028206 +num_pieces 1645465 (+0.262431 [bits/kmer]) +=== step 1: 'parse_file' 27.9652 [sec] (74.335 [ns/kmer]) + == files to merge = 3 +num_written_tuples = 50000000 +num_written_tuples = 70028206 +num_minimizers 62055494 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 4.32492 [sec] (11.4962 [ns/kmer]) +bits_per_offset = ceil(log2(425569136)) = 29 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279270850453237.bucket_pairs.0.bin'... +num_singletons 55876049/62055494 (90.0421%) +=== step 3: 'build_index' 11.3486 [sec] (30.1659 [ns/kmer]) +max_num_super_kmers_in_bucket 311 +log2_max_num_super_kmers_in_bucket 9 +num_buckets_in_skew_index 33/62055494 (5.31782e-05%) +num_partitions 3 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 11429 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 4613 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 311: 4820 +num_kmers_in_skew_index 20862 (0.00554538%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 11429 + building MPHF with 64 threads and 1 partitions... + built mphs[0] for 11429 keys; bits/key = 3.11208 + built positions[0] for 11429 keys; bits/key = 7.03334 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 4613 + building MPHF with 64 threads and 1 partitions... + built mphs[1] for 4613 keys; bits/key = 3.79449 + built positions[1] for 4613 keys; bits/key = 8.07457 + lower 256; upper 311; num_bits_per_pos 9; keys_in_partition.size() 4820 + building MPHF with 64 threads and 1 partitions... + built mphs[2] for 4820 keys; bits/key = 3.69793 + built positions[2] for 4820 keys; bits/key = 9.06888 +num_bits_for_skew_index 232432(0.000617833 [bits/kmer]) +=== step 4: 'build_skew_index' 0.308427 [sec] (0.819837 [ns/kmer]) +=== total_time 43.9471 [sec] (116.817 [ns/kmer]) +total index size: 396825922 [B] -- 396.826 [MB] +SPACE BREAKDOWN: + minimizers: 0.449615 [bits/kmer] (2.72575 [bits/key]) -- 5.32814% + pieces: 0.0487325 [bits/kmer] -- 0.577502% + num_super_kmers_before_bucket: 0.278932 [bits/kmer] -- 3.30547% + offsets: 5.39817 [bits/kmer] -- 63.9707% + strings: 2.26243 [bits/kmer] -- 26.8108% + skew_index: 0.000617833 [bits/kmer] -- 0.0073216% + weights: 3.91276e-06 [bits/kmer] -- 4.63679e-05% + weight_interval_values: 6.8048e-07 [bits/kmer] + weight_interval_lengths: 2.5518e-06 [bits/kmer] + weight_dictionary: 6.8048e-07 [bits/kmer] + -------------- + total: 8.4385 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 90.0421% +buckets with 2 super_kmers = 8.21444% +buckets with 3 super_kmers = 1.16879% +buckets with 4 super_kmers = 0.332915% +buckets with 5 super_kmers = 0.121902% +buckets with 6 super_kmers = 0.053631% +buckets with 7 super_kmers = 0.0266197% +buckets with 8 super_kmers = 0.0143613% +buckets with 9 super_kmers = 0.00850529% +buckets with 10 super_kmers = 0.00518085% +buckets with 11 super_kmers = 0.00316652% +buckets with 12 super_kmers = 0.00211585% +buckets with 13 super_kmers = 0.00149382% +buckets with 14 super_kmers = 0.0010265% +buckets with 15 super_kmers = 0.000747718% +buckets with 16 super_kmers = 0.000525336% +max_num_super_kmers_in_bucket 311 +2025-11-27 22:34:42: saving data structure to disk... +2025-11-27 22:34:42: DONE +k = 31, m = 21, seed = 1, l = 6, c = 3, canonical_parsing = true, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +m_buffer_size 29411764 +read 100000 sequences, 8718376 bases, 5718406 kmers +read 200000 sequences, 17474036 bases, 11474066 kmers +read 300000 sequences, 26299388 bases, 17299418 kmers +read 400000 sequences, 35167021 bases, 23167051 kmers +read 500000 sequences, 43967859 bases, 28967889 kmers +read 600000 sequences, 52886411 bases, 34886441 kmers +read 700000 sequences, 61937165 bases, 40937195 kmers +read 800000 sequences, 71070675 bases, 47070705 kmers +read 900000 sequences, 80176820 bases, 53176850 kmers +read 1000000 sequences, 89525814 bases, 59525844 kmers +read 1100000 sequences, 98984767 bases, 65984797 kmers +read 1200000 sequences, 108719822 bases, 72719852 kmers +read 1300000 sequences, 118280750 bases, 79280780 kmers +read 1400000 sequences, 127917709 bases, 85917739 kmers +read 1500000 sequences, 137591502 bases, 92591532 kmers +read 1600000 sequences, 147395162 bases, 99395192 kmers +read 1700000 sequences, 157334953 bases, 106334983 kmers +read 1800000 sequences, 167444668 bases, 113444698 kmers +read 1900000 sequences, 177725512 bases, 120725542 kmers +read 2000000 sequences, 188052017 bases, 128052047 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279282875766784.minimizers.0.bin'... +read 2100000 sequences, 198499489 bases, 135499519 kmers +read 2200000 sequences, 209053217 bases, 143053247 kmers +read 2300000 sequences, 219847953 bases, 150847983 kmers +read 2400000 sequences, 230787134 bases, 158787164 kmers +read 2500000 sequences, 242014317 bases, 167014347 kmers +read 2600000 sequences, 253501939 bases, 175501969 kmers +read 2700000 sequences, 265108629 bases, 184108659 kmers +read 2800000 sequences, 277040099 bases, 193040129 kmers +read 2900000 sequences, 289406610 bases, 202406640 kmers +read 3000000 sequences, 302142147 bases, 212142177 kmers +read 3100000 sequences, 315168399 bases, 222168429 kmers +read 3200000 sequences, 329083022 bases, 233083052 kmers +read 3300000 sequences, 343507959 bases, 244507989 kmers +read 3400000 sequences, 358607940 bases, 256607970 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279282875766784.minimizers.1.bin'... +read 3500000 sequences, 374790281 bases, 269790311 kmers +read 3600000 sequences, 392123240 bases, 284123270 kmers +read 3700000 sequences, 410698110 bases, 299698140 kmers +read 3800000 sequences, 431358012 bases, 317358042 kmers +read 3900000 sequences, 454179419 bases, 337179449 kmers +read 4000000 sequences, 461236464 bases, 341236494 kmers +read 4100000 sequences, 468031488 bases, 345031518 kmers +read 4200000 sequences, 474889537 bases, 348889567 kmers +read 4300000 sequences, 481729647 bases, 352729677 kmers +read 4400000 sequences, 488721923 bases, 356721953 kmers +read 4500000 sequences, 495625017 bases, 360625047 kmers +read 4600000 sequences, 502596529 bases, 364596559 kmers +read 4700000 sequences, 509600468 bases, 368600498 kmers +read 4800000 sequences, 516484409 bases, 372484439 kmers +read 4900000 sequences, 523427172 bases, 376427202 kmers +read 5000000 sequences, 530371696 bases, 380371726 kmers +read 5100000 sequences, 537329533 bases, 384329563 kmers +read 5200000 sequences, 544209307 bases, 388209337 kmers +read 5300000 sequences, 551189884 bases, 392189914 kmers +read 5400000 sequences, 558101859 bases, 396101889 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279282875766784.minimizers.2.bin'... +read 5500000 sequences, 565120102 bases, 400120132 kmers +read 5600000 sequences, 572157514 bases, 404157544 kmers +read 5700000 sequences, 579064805 bases, 408064835 kmers +read 5800000 sequences, 586002650 bases, 412002680 kmers +read 5900000 sequences, 592940870 bases, 415940900 kmers +read 6000000 sequences, 600036436 bases, 420036466 kmers +read 6100000 sequences, 607035741 bases, 424035771 kmers +read 6200000 sequences, 614208655 bases, 428208685 kmers +read 6300000 sequences, 621168185 bases, 432168215 kmers +read 6400000 sequences, 628072415 bases, 436072445 kmers +read 6500000 sequences, 635102553 bases, 440102583 kmers +read 6600000 sequences, 642120687 bases, 444120717 kmers +read 6700000 sequences, 649088919 bases, 448088949 kmers +read 6800000 sequences, 656101716 bases, 452101746 kmers +read 6900000 sequences, 663148824 bases, 456148854 kmers +read 7000000 sequences, 670072415 bases, 460072445 kmers +read 7100000 sequences, 677027793 bases, 464027823 kmers +read 7200000 sequences, 684172149 bases, 468172179 kmers +read 7300000 sequences, 691211136 bases, 472211166 kmers +read 7400000 sequences, 698208759 bases, 476208789 kmers +read 7500000 sequences, 705328918 bases, 480328948 kmers +read 7600000 sequences, 712360901 bases, 484360931 kmers +read 7700000 sequences, 719471743 bases, 488471773 kmers +read 7800000 sequences, 726605082 bases, 492605112 kmers +read 7900000 sequences, 733774553 bases, 496774583 kmers +read 8000000 sequences, 740830630 bases, 500830660 kmers +read 8100000 sequences, 747950142 bases, 504950172 kmers +read 8200000 sequences, 755058699 bases, 509058729 kmers +read 8300000 sequences, 762216366 bases, 513216396 kmers +read 8400000 sequences, 769353795 bases, 517353825 kmers +read 8500000 sequences, 776583104 bases, 521583134 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279282875766784.minimizers.3.bin'... +read 8600000 sequences, 783816161 bases, 525816191 kmers +read 8700000 sequences, 791077309 bases, 530077339 kmers +read 8800000 sequences, 798157720 bases, 534157750 kmers +read 8900000 sequences, 805240984 bases, 538241014 kmers +read 9000000 sequences, 812530422 bases, 542530452 kmers +read 9100000 sequences, 819617660 bases, 546617690 kmers +read 9200000 sequences, 826809679 bases, 550809709 kmers +read 9300000 sequences, 833919076 bases, 554919106 kmers +read 9400000 sequences, 841155631 bases, 559155661 kmers +read 9500000 sequences, 848401634 bases, 563401664 kmers +read 9600000 sequences, 855643616 bases, 567643646 kmers +read 9700000 sequences, 862872646 bases, 571872676 kmers +read 9800000 sequences, 870171662 bases, 576171692 kmers +read 9900000 sequences, 877460842 bases, 580460872 kmers +read 10000000 sequences, 884692117 bases, 584692147 kmers +read 10100000 sequences, 891988881 bases, 588988911 kmers +read 10200000 sequences, 899332113 bases, 593332143 kmers +read 10300000 sequences, 906596087 bases, 597596117 kmers +read 10400000 sequences, 913914165 bases, 601914195 kmers +read 10500000 sequences, 921197132 bases, 606197162 kmers +read 10600000 sequences, 928621300 bases, 610621330 kmers +read 10700000 sequences, 935962513 bases, 614962543 kmers +read 10800000 sequences, 943225364 bases, 619225394 kmers +read 10900000 sequences, 950606220 bases, 623606250 kmers +read 11000000 sequences, 958222210 bases, 628222240 kmers +read 11100000 sequences, 965609577 bases, 632609607 kmers +read 11200000 sequences, 972999540 bases, 636999570 kmers +read 11300000 sequences, 980385943 bases, 641385973 kmers +read 11400000 sequences, 987756659 bases, 645756689 kmers +read 11500000 sequences, 995228579 bases, 650228609 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279282875766784.minimizers.4.bin'... +read 11600000 sequences, 1002715916 bases, 654715946 kmers +read 11700000 sequences, 1010163181 bases, 659163211 kmers +read 11800000 sequences, 1017603999 bases, 663604029 kmers +read 11900000 sequences, 1025150283 bases, 668150313 kmers +read 12000000 sequences, 1032736012 bases, 672736042 kmers +read 12100000 sequences, 1040213462 bases, 677213492 kmers +read 12200000 sequences, 1047864710 bases, 681864740 kmers +read 12300000 sequences, 1055370919 bases, 686370949 kmers +read 12400000 sequences, 1062882101 bases, 690882131 kmers +read 12500000 sequences, 1070457753 bases, 695457783 kmers +read 12600000 sequences, 1078056452 bases, 700056482 kmers +read 12700000 sequences, 1085619838 bases, 704619868 kmers +read 12800000 sequences, 1093220312 bases, 709220342 kmers +read 12900000 sequences, 1100937956 bases, 713937986 kmers +read 13000000 sequences, 1108501138 bases, 718501168 kmers +read 13100000 sequences, 1116117632 bases, 723117662 kmers +read 13200000 sequences, 1123791900 bases, 727791930 kmers +read 13300000 sequences, 1131456231 bases, 732456261 kmers +read 13400000 sequences, 1139167200 bases, 737167230 kmers +read 13500000 sequences, 1147084189 bases, 742084219 kmers +read 13600000 sequences, 1154903385 bases, 746903415 kmers +read 13700000 sequences, 1162719538 bases, 751719568 kmers +read 13800000 sequences, 1170515131 bases, 756515161 kmers +read 13900000 sequences, 1178310843 bases, 761310873 kmers +read 14000000 sequences, 1186158447 bases, 766158477 kmers +read 14100000 sequences, 1194143426 bases, 771143456 kmers +read 14200000 sequences, 1202037098 bases, 776037128 kmers +read 14300000 sequences, 1210016683 bases, 781016713 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279282875766784.minimizers.5.bin'... +read 14400000 sequences, 1217929646 bases, 785929676 kmers +read 14500000 sequences, 1226009490 bases, 791009520 kmers +read 14600000 sequences, 1233936479 bases, 795936509 kmers +read 14700000 sequences, 1241953750 bases, 800953780 kmers +read 14800000 sequences, 1249971870 bases, 805971900 kmers +read 14900000 sequences, 1258011905 bases, 811011935 kmers +read 15000000 sequences, 1266102845 bases, 816102875 kmers +read 15100000 sequences, 1274149642 bases, 821149672 kmers +read 15200000 sequences, 1282474792 bases, 826474822 kmers +read 15300000 sequences, 1290768894 bases, 831768924 kmers +read 15400000 sequences, 1299121736 bases, 837121766 kmers +read 15500000 sequences, 1307304137 bases, 842304167 kmers +read 15600000 sequences, 1315661695 bases, 847661725 kmers +read 15700000 sequences, 1324089803 bases, 853089833 kmers +read 15800000 sequences, 1332318556 bases, 858318586 kmers +read 15900000 sequences, 1340760519 bases, 863760549 kmers +read 16000000 sequences, 1349263730 bases, 869263760 kmers +read 16100000 sequences, 1357811657 bases, 874811687 kmers +read 16200000 sequences, 1366358116 bases, 880358146 kmers +read 16300000 sequences, 1375026989 bases, 886027019 kmers +read 16400000 sequences, 1383710154 bases, 891710184 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279282875766784.minimizers.6.bin'... +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +num_super_kmers 201604301 +num_pieces 16440874 (+1.10303 [bits/kmer]) +=== step 1: 'parse_file' 65.3814 [sec] (73.1082 [ns/kmer]) + == files to merge = 7 +num_written_tuples = 50000000 +num_written_tuples = 100000000 +num_written_tuples = 150000000 +num_written_tuples = 200000000 +num_written_tuples = 201604301 +num_minimizers 144120499 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 12.6663 [sec] (14.1632 [ns/kmer]) +bits_per_offset = ceil(log2(1387536305)) = 31 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279361141144604.bucket_pairs.0.bin'... +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279361141144604.bucket_pairs.1.bin'... +num_singletons 121051637/144120499 (83.9934%) + == files to merge = 2 +num_written_pairs = 23068862 +=== step 3: 'build_index' 39.4047 [sec] (44.0616 [ns/kmer]) +max_num_super_kmers_in_bucket 84137 +log2_max_num_super_kmers_in_bucket 17 +num_buckets_in_skew_index 16248/144120499 (0.0112739%) +num_partitions 7 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 4477797 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 1948141 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 1144428 + partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 817502 + partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 598296 + partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 442321 + partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 84137: 911319 +num_kmers_in_skew_index 10339804 (1.15618%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 4477797 + building MPHF with 64 threads and 256 partitions... + built mphs[0] for 4477797 keys; bits/key = 3.22778 + built positions[0] for 4477797 keys; bits/key = 7.00009 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 1948141 + building MPHF with 64 threads and 97 partitions... + built mphs[1] for 1948141 keys; bits/key = 3.13402 + built positions[1] for 1948141 keys; bits/key = 8.00018 + lower 256; upper 512; num_bits_per_pos 9; keys_in_partition.size() 1144428 + building MPHF with 64 threads and 57 partitions... + built mphs[2] for 1144428 keys; bits/key = 3.01254 + built positions[2] for 1144428 keys; bits/key = 9.00033 + lower 512; upper 1024; num_bits_per_pos 10; keys_in_partition.size() 817502 + building MPHF with 64 threads and 40 partitions... + built mphs[3] for 817502 keys; bits/key = 2.97329 + built positions[3] for 817502 keys; bits/key = 10.0004 + lower 1024; upper 2048; num_bits_per_pos 11; keys_in_partition.size() 598296 + building MPHF with 64 threads and 29 partitions... + built mphs[4] for 598296 keys; bits/key = 2.95535 + built positions[4] for 598296 keys; bits/key = 11.0006 + lower 2048; upper 4096; num_bits_per_pos 12; keys_in_partition.size() 442321 + building MPHF with 64 threads and 22 partitions... + built mphs[5] for 442321 keys; bits/key = 2.96053 + built positions[5] for 442321 keys; bits/key = 12.0008 + lower 4096; upper 84137; num_bits_per_pos 17; keys_in_partition.size() 911319 + building MPHF with 64 threads and 45 partitions... + built mphs[6] for 911319 keys; bits/key = 2.98769 + built positions[6] for 911319 keys; bits/key = 17.0004 +num_bits_for_skew_index 125026416(0.139802 [bits/kmer]) +=== step 4: 'build_skew_index' 1.81259 [sec] (2.0268 [ns/kmer]) +=== total_time 119.265 [sec] (133.36 [ns/kmer]) +total index size: 1246772410 [B] -- 1246.77 [MB] +SPACE BREAKDOWN: + minimizers: 0.428705 [bits/kmer] (2.66024 [bits/key]) -- 3.84388% + pieces: 0.176908 [bits/kmer] -- 1.5862% + num_super_kmers_before_bucket: 0.316153 [bits/kmer] -- 2.83471% + offsets: 6.98833 [bits/kmer] -- 62.6591% + strings: 3.10303 [bits/kmer] -- 27.8226% + skew_index: 0.139802 [bits/kmer] -- 1.2535% + weights: 1.64596e-06 [bits/kmer] -- 1.47581e-05% + weight_interval_values: 2.86254e-07 [bits/kmer] + weight_interval_lengths: 1.07345e-06 [bits/kmer] + weight_dictionary: 2.86254e-07 [bits/kmer] + -------------- + total: 11.1529 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 83.9934% +buckets with 2 super_kmers = 10.3451% +buckets with 3 super_kmers = 2.43817% +buckets with 4 super_kmers = 1.02502% +buckets with 5 super_kmers = 0.546504% +buckets with 6 super_kmers = 0.339% +buckets with 7 super_kmers = 0.232088% +buckets with 8 super_kmers = 0.170709% +buckets with 9 super_kmers = 0.132568% +buckets with 10 super_kmers = 0.106718% +buckets with 11 super_kmers = 0.088911% +buckets with 12 super_kmers = 0.075244% +buckets with 13 super_kmers = 0.0646494% +buckets with 14 super_kmers = 0.0560184% +buckets with 15 super_kmers = 0.049306% +buckets with 16 super_kmers = 0.0428447% +max_num_super_kmers_in_bucket 84137 +2025-11-27 22:36:42: saving data structure to disk... +2025-11-27 22:36:43: DONE +k = 31, m = 21, seed = 1, l = 6, c = 3, canonical_parsing = true, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/hprc.k31.eulertigs.fa.gz'... +m_buffer_size 29411764 +read 100000 sequences, 15142600 bases, 12142630 kmers +read 200000 sequences, 30291716 bases, 24291746 kmers +read 300000 sequences, 45689080 bases, 36689110 kmers +read 400000 sequences, 61242824 bases, 49242854 kmers +read 500000 sequences, 77204688 bases, 62204718 kmers +read 600000 sequences, 93322728 bases, 75322758 kmers +read 700000 sequences, 109580784 bases, 88580814 kmers +read 800000 sequences, 126019199 bases, 102019229 kmers +read 900000 sequences, 142771255 bases, 115771285 kmers +read 1000000 sequences, 159860280 bases, 129860310 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.0.bin'... +read 1100000 sequences, 177221889 bases, 144221919 kmers +read 1200000 sequences, 194614623 bases, 158614653 kmers +read 1300000 sequences, 212431367 bases, 173431397 kmers +read 1400000 sequences, 230443062 bases, 188443092 kmers +read 1500000 sequences, 248830724 bases, 203830754 kmers +read 1600000 sequences, 267495814 bases, 219495844 kmers +read 1700000 sequences, 286467424 bases, 235467454 kmers +read 1800000 sequences, 305974756 bases, 251974786 kmers +read 1900000 sequences, 325573452 bases, 268573482 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.1.bin'... +read 2000000 sequences, 345518992 bases, 285519022 kmers +read 2100000 sequences, 365932818 bases, 302932848 kmers +read 2200000 sequences, 386928554 bases, 320928584 kmers +read 2300000 sequences, 408196559 bases, 339196589 kmers +read 2400000 sequences, 429705966 bases, 357705996 kmers +read 2500000 sequences, 451663903 bases, 376663933 kmers +read 2600000 sequences, 474186039 bases, 396186069 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.2.bin'... +read 2700000 sequences, 497212102 bases, 416212132 kmers +read 2800000 sequences, 520661914 bases, 436661944 kmers +read 2900000 sequences, 544613770 bases, 457613800 kmers +read 3000000 sequences, 569210361 bases, 479210391 kmers +read 3100000 sequences, 594100200 bases, 501100230 kmers +read 3200000 sequences, 619323714 bases, 523323744 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.3.bin'... +read 3300000 sequences, 645628376 bases, 546628406 kmers +read 3400000 sequences, 672447793 bases, 570447823 kmers +read 3500000 sequences, 699905664 bases, 594905694 kmers +read 3600000 sequences, 728253444 bases, 620253474 kmers +read 3700000 sequences, 756996641 bases, 645996671 kmers +read 3800000 sequences, 786498161 bases, 672498191 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.4.bin'... +read 3900000 sequences, 816910234 bases, 699910264 kmers +read 4000000 sequences, 848332113 bases, 728332143 kmers +read 4100000 sequences, 880941119 bases, 757941149 kmers +read 4200000 sequences, 914418229 bases, 788418259 kmers +read 4300000 sequences, 948701080 bases, 819701110 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.5.bin'... +read 4400000 sequences, 984340067 bases, 852340097 kmers +read 4500000 sequences, 1021325774 bases, 886325804 kmers +read 4600000 sequences, 1059629391 bases, 921629421 kmers +read 4700000 sequences, 1098776183 bases, 957776213 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.6.bin'... +read 4800000 sequences, 1139816048 bases, 995816078 kmers +read 4900000 sequences, 1182502543 bases, 1035502573 kmers +read 5000000 sequences, 1226889874 bases, 1076889904 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.7.bin'... +read 5100000 sequences, 1272830961 bases, 1119830991 kmers +read 5200000 sequences, 1320724320 bases, 1164724350 kmers +read 5300000 sequences, 1371130682 bases, 1212130712 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.8.bin'... +read 5400000 sequences, 1423474687 bases, 1261474717 kmers +read 5500000 sequences, 1478532677 bases, 1313532707 kmers +read 5600000 sequences, 1536511057 bases, 1368511087 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.9.bin'... +read 5700000 sequences, 1597650497 bases, 1426650527 kmers +read 5800000 sequences, 1661331597 bases, 1487331627 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.10.bin'... +read 5900000 sequences, 1728788299 bases, 1551788329 kmers +read 6000000 sequences, 1800462732 bases, 1620462762 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.11.bin'... +read 6100000 sequences, 1855582708 bases, 1672582738 kmers +read 6200000 sequences, 1861290572 bases, 1675290602 kmers +read 6300000 sequences, 1866991959 bases, 1677991989 kmers +read 6400000 sequences, 1872712703 bases, 1680712733 kmers +read 6500000 sequences, 1878412742 bases, 1683412772 kmers +read 6600000 sequences, 1884126738 bases, 1686126768 kmers +read 6700000 sequences, 1889847765 bases, 1688847795 kmers +read 6800000 sequences, 1895565065 bases, 1691565095 kmers +read 6900000 sequences, 1901268580 bases, 1694268610 kmers +read 7000000 sequences, 1906975331 bases, 1696975361 kmers +read 7100000 sequences, 1912694936 bases, 1699694966 kmers +read 7200000 sequences, 1918405386 bases, 1702405416 kmers +read 7300000 sequences, 1924111712 bases, 1705111742 kmers +read 7400000 sequences, 1929830119 bases, 1707830149 kmers +read 7500000 sequences, 1935539974 bases, 1710540004 kmers +read 7600000 sequences, 1941266098 bases, 1713266128 kmers +read 7700000 sequences, 1946976061 bases, 1715976091 kmers +read 7800000 sequences, 1952688102 bases, 1718688132 kmers +read 7900000 sequences, 1958401942 bases, 1721401972 kmers +read 8000000 sequences, 1964117224 bases, 1724117254 kmers +read 8100000 sequences, 1969830329 bases, 1726830359 kmers +read 8200000 sequences, 1975558497 bases, 1729558527 kmers +read 8300000 sequences, 1981283991 bases, 1732284021 kmers +read 8400000 sequences, 1986995233 bases, 1734995263 kmers +read 8500000 sequences, 1992721377 bases, 1737721407 kmers +read 8600000 sequences, 1998449484 bases, 1740449514 kmers +read 8700000 sequences, 2004184325 bases, 1743184355 kmers +read 8800000 sequences, 2009910285 bases, 1745910315 kmers +read 8900000 sequences, 2015628049 bases, 1748628079 kmers +read 9000000 sequences, 2021346657 bases, 1751346687 kmers +read 9100000 sequences, 2027081520 bases, 1754081550 kmers +read 9200000 sequences, 2032818008 bases, 1756818038 kmers +read 9300000 sequences, 2038553000 bases, 1759553030 kmers +read 9400000 sequences, 2044303164 bases, 1762303194 kmers +read 9500000 sequences, 2050045456 bases, 1765045486 kmers +read 9600000 sequences, 2055782084 bases, 1767782114 kmers +read 9700000 sequences, 2061515459 bases, 1770515489 kmers +read 9800000 sequences, 2067264696 bases, 1773264726 kmers +read 9900000 sequences, 2073029586 bases, 1776029616 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.12.bin'... +read 10000000 sequences, 2078777071 bases, 1778777101 kmers +read 10100000 sequences, 2084516688 bases, 1781516718 kmers +read 10200000 sequences, 2090254180 bases, 1784254210 kmers +read 10300000 sequences, 2096010791 bases, 1787010821 kmers +read 10400000 sequences, 2101750068 bases, 1789750098 kmers +read 10500000 sequences, 2107490469 bases, 1792490499 kmers +read 10600000 sequences, 2113226774 bases, 1795226804 kmers +read 10700000 sequences, 2118983996 bases, 1797984026 kmers +read 10800000 sequences, 2124753124 bases, 1800753154 kmers +read 10900000 sequences, 2130500309 bases, 1803500339 kmers +read 11000000 sequences, 2136245797 bases, 1806245827 kmers +read 11100000 sequences, 2141998968 bases, 1808998998 kmers +read 11200000 sequences, 2147751449 bases, 1811751479 kmers +read 11300000 sequences, 2153511605 bases, 1814511635 kmers +read 11400000 sequences, 2159254615 bases, 1817254645 kmers +read 11500000 sequences, 2165018820 bases, 1820018850 kmers +read 11600000 sequences, 2170788091 bases, 1822788121 kmers +read 11700000 sequences, 2176561459 bases, 1825561489 kmers +read 11800000 sequences, 2182327393 bases, 1828327423 kmers +read 11900000 sequences, 2188093393 bases, 1831093423 kmers +read 12000000 sequences, 2193864455 bases, 1833864485 kmers +read 12100000 sequences, 2199639949 bases, 1836639979 kmers +read 12200000 sequences, 2205409553 bases, 1839409583 kmers +read 12300000 sequences, 2211196032 bases, 1842196062 kmers +read 12400000 sequences, 2216980660 bases, 1844980690 kmers +read 12500000 sequences, 2222778653 bases, 1847778683 kmers +read 12600000 sequences, 2228548298 bases, 1850548328 kmers +read 12700000 sequences, 2234333686 bases, 1853333716 kmers +read 12800000 sequences, 2240119243 bases, 1856119273 kmers +read 12900000 sequences, 2245909832 bases, 1858909862 kmers +read 13000000 sequences, 2251713079 bases, 1861713109 kmers +read 13100000 sequences, 2257517156 bases, 1864517186 kmers +read 13200000 sequences, 2263299799 bases, 1867299829 kmers +read 13300000 sequences, 2269090355 bases, 1870090385 kmers +read 13400000 sequences, 2274889056 bases, 1872889086 kmers +read 13500000 sequences, 2280679358 bases, 1875679388 kmers +read 13600000 sequences, 2286496297 bases, 1878496327 kmers +read 13700000 sequences, 2292303406 bases, 1881303436 kmers +read 13800000 sequences, 2298095843 bases, 1884095873 kmers +read 13900000 sequences, 2303896230 bases, 1886896260 kmers +read 14000000 sequences, 2309685280 bases, 1889685310 kmers +read 14100000 sequences, 2315487789 bases, 1892487819 kmers +read 14200000 sequences, 2321299096 bases, 1895299126 kmers +read 14300000 sequences, 2327098692 bases, 1898098722 kmers +read 14400000 sequences, 2332930649 bases, 1900930679 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.13.bin'... +read 14500000 sequences, 2338742250 bases, 1903742280 kmers +read 14600000 sequences, 2344551767 bases, 1906551797 kmers +read 14700000 sequences, 2350372494 bases, 1909372524 kmers +read 14800000 sequences, 2356184423 bases, 1912184453 kmers +read 14900000 sequences, 2362012427 bases, 1915012457 kmers +read 15000000 sequences, 2367830829 bases, 1917830859 kmers +read 15100000 sequences, 2373646109 bases, 1920646139 kmers +read 15200000 sequences, 2379487578 bases, 1923487608 kmers +read 15300000 sequences, 2385319549 bases, 1926319579 kmers +read 15400000 sequences, 2391159959 bases, 1929159989 kmers +read 15500000 sequences, 2396996685 bases, 1931996715 kmers +read 15600000 sequences, 2402827442 bases, 1934827472 kmers +read 15700000 sequences, 2408655214 bases, 1937655244 kmers +read 15800000 sequences, 2414491211 bases, 1940491241 kmers +read 15900000 sequences, 2420340361 bases, 1943340391 kmers +read 16000000 sequences, 2426185046 bases, 1946185076 kmers +read 16100000 sequences, 2432032084 bases, 1949032114 kmers +read 16200000 sequences, 2437865309 bases, 1951865339 kmers +read 16300000 sequences, 2443712415 bases, 1954712445 kmers +read 16400000 sequences, 2449571035 bases, 1957571065 kmers +read 16500000 sequences, 2455436767 bases, 1960436797 kmers +read 16600000 sequences, 2461294139 bases, 1963294169 kmers +read 16700000 sequences, 2467151066 bases, 1966151096 kmers +read 16800000 sequences, 2473014518 bases, 1969014548 kmers +read 16900000 sequences, 2478879148 bases, 1971879178 kmers +read 17000000 sequences, 2484756311 bases, 1974756341 kmers +read 17100000 sequences, 2490639911 bases, 1977639941 kmers +read 17200000 sequences, 2496505090 bases, 1980505120 kmers +read 17300000 sequences, 2502376237 bases, 1983376267 kmers +read 17400000 sequences, 2508235866 bases, 1986235896 kmers +read 17500000 sequences, 2514118929 bases, 1989118959 kmers +read 17600000 sequences, 2520003821 bases, 1992003851 kmers +read 17700000 sequences, 2525879572 bases, 1994879602 kmers +read 17800000 sequences, 2531769727 bases, 1997769757 kmers +read 17900000 sequences, 2537665456 bases, 2000665486 kmers +read 18000000 sequences, 2543560738 bases, 2003560768 kmers +read 18100000 sequences, 2549454266 bases, 2006454296 kmers +read 18200000 sequences, 2555337271 bases, 2009337301 kmers +read 18300000 sequences, 2561224774 bases, 2012224804 kmers +read 18400000 sequences, 2567143057 bases, 2015143087 kmers +read 18500000 sequences, 2573036123 bases, 2018036153 kmers +read 18600000 sequences, 2578923948 bases, 2020923978 kmers +read 18700000 sequences, 2584830711 bases, 2023830741 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.14.bin'... +read 18800000 sequences, 2590732809 bases, 2026732839 kmers +read 18900000 sequences, 2596629138 bases, 2029629168 kmers +read 19000000 sequences, 2602544769 bases, 2032544799 kmers +read 19100000 sequences, 2608476616 bases, 2035476646 kmers +read 19200000 sequences, 2614389831 bases, 2038389861 kmers +read 19300000 sequences, 2620312306 bases, 2041312336 kmers +read 19400000 sequences, 2626244668 bases, 2044244698 kmers +read 19500000 sequences, 2632165840 bases, 2047165870 kmers +read 19600000 sequences, 2638096284 bases, 2050096314 kmers +read 19700000 sequences, 2644022948 bases, 2053022978 kmers +read 19800000 sequences, 2649949779 bases, 2055949809 kmers +read 19900000 sequences, 2655887655 bases, 2058887685 kmers +read 20000000 sequences, 2661829270 bases, 2061829300 kmers +read 20100000 sequences, 2667773885 bases, 2064773915 kmers +read 20200000 sequences, 2673731680 bases, 2067731710 kmers +read 20300000 sequences, 2679668638 bases, 2070668668 kmers +read 20400000 sequences, 2685621410 bases, 2073621440 kmers +read 20500000 sequences, 2691583280 bases, 2076583310 kmers +read 20600000 sequences, 2697548031 bases, 2079548061 kmers +read 20700000 sequences, 2703523021 bases, 2082523051 kmers +read 20800000 sequences, 2709482823 bases, 2085482853 kmers +read 20900000 sequences, 2715437358 bases, 2088437388 kmers +read 21000000 sequences, 2721408412 bases, 2091408442 kmers +read 21100000 sequences, 2727378283 bases, 2094378313 kmers +read 21200000 sequences, 2733356793 bases, 2097356823 kmers +read 21300000 sequences, 2739324030 bases, 2100324060 kmers +read 21400000 sequences, 2745310949 bases, 2103310979 kmers +read 21500000 sequences, 2751305483 bases, 2106305513 kmers +read 21600000 sequences, 2757279970 bases, 2109280000 kmers +read 21700000 sequences, 2763250342 bases, 2112250372 kmers +read 21800000 sequences, 2769246031 bases, 2115246061 kmers +read 21900000 sequences, 2775228369 bases, 2118228399 kmers +read 22000000 sequences, 2781228805 bases, 2121228835 kmers +read 22100000 sequences, 2787227943 bases, 2124227973 kmers +read 22200000 sequences, 2793232303 bases, 2127232333 kmers +read 22300000 sequences, 2799254476 bases, 2130254506 kmers +read 22400000 sequences, 2805268478 bases, 2133268508 kmers +read 22500000 sequences, 2811273807 bases, 2136273837 kmers +read 22600000 sequences, 2817297446 bases, 2139297476 kmers +read 22700000 sequences, 2823311839 bases, 2142311869 kmers +read 22800000 sequences, 2829348673 bases, 2145348703 kmers +read 22900000 sequences, 2835387522 bases, 2148387552 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.15.bin'... +read 23000000 sequences, 2841415061 bases, 2151415091 kmers +read 23100000 sequences, 2847447355 bases, 2154447385 kmers +read 23200000 sequences, 2853464241 bases, 2157464271 kmers +read 23300000 sequences, 2859504344 bases, 2160504374 kmers +read 23400000 sequences, 2865558640 bases, 2163558670 kmers +read 23500000 sequences, 2871616623 bases, 2166616653 kmers +read 23600000 sequences, 2877666220 bases, 2169666250 kmers +read 23700000 sequences, 2883708997 bases, 2172709027 kmers +read 23800000 sequences, 2889780064 bases, 2175780094 kmers +read 23900000 sequences, 2895851763 bases, 2178851793 kmers +read 24000000 sequences, 2901936339 bases, 2181936369 kmers +read 24100000 sequences, 2908006001 bases, 2185006031 kmers +read 24200000 sequences, 2914097489 bases, 2188097519 kmers +read 24300000 sequences, 2920158525 bases, 2191158555 kmers +read 24400000 sequences, 2926232288 bases, 2194232318 kmers +read 24500000 sequences, 2932310359 bases, 2197310389 kmers +read 24600000 sequences, 2938383670 bases, 2200383700 kmers +read 24700000 sequences, 2944455547 bases, 2203455577 kmers +read 24800000 sequences, 2950553509 bases, 2206553539 kmers +read 24900000 sequences, 2956625157 bases, 2209625187 kmers +read 25000000 sequences, 2962750710 bases, 2212750740 kmers +read 25100000 sequences, 2968838441 bases, 2215838471 kmers +read 25200000 sequences, 2974964577 bases, 2218964607 kmers +read 25300000 sequences, 2981066348 bases, 2222066378 kmers +read 25400000 sequences, 2987174670 bases, 2225174700 kmers +read 25500000 sequences, 2993287441 bases, 2228287471 kmers +read 25600000 sequences, 2999404181 bases, 2231404211 kmers +read 25700000 sequences, 3005525939 bases, 2234525969 kmers +read 25800000 sequences, 3011641818 bases, 2237641848 kmers +read 25900000 sequences, 3017762680 bases, 2240762710 kmers +read 26000000 sequences, 3023914368 bases, 2243914398 kmers +read 26100000 sequences, 3030074467 bases, 2247074497 kmers +read 26200000 sequences, 3036215358 bases, 2250215388 kmers +read 26300000 sequences, 3042374173 bases, 2253374203 kmers +read 26400000 sequences, 3048520171 bases, 2256520201 kmers +read 26500000 sequences, 3054682791 bases, 2259682821 kmers +read 26600000 sequences, 3060872341 bases, 2262872371 kmers +read 26700000 sequences, 3067031340 bases, 2266031370 kmers +read 26800000 sequences, 3073202220 bases, 2269202250 kmers +read 26900000 sequences, 3079363082 bases, 2272363112 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.16.bin'... +read 27000000 sequences, 3085556027 bases, 2275556057 kmers +read 27100000 sequences, 3091751521 bases, 2278751551 kmers +read 27200000 sequences, 3097952572 bases, 2281952602 kmers +read 27300000 sequences, 3104145541 bases, 2285145571 kmers +read 27400000 sequences, 3110358894 bases, 2288358924 kmers +read 27500000 sequences, 3116536976 bases, 2291537006 kmers +read 27600000 sequences, 3122708679 bases, 2294708709 kmers +read 27700000 sequences, 3128900397 bases, 2297900427 kmers +read 27800000 sequences, 3135111542 bases, 2301111572 kmers +read 27900000 sequences, 3141317211 bases, 2304317241 kmers +read 28000000 sequences, 3147523754 bases, 2307523784 kmers +read 28100000 sequences, 3153730407 bases, 2310730437 kmers +read 28200000 sequences, 3159957684 bases, 2313957714 kmers +read 28300000 sequences, 3166181142 bases, 2317181172 kmers +read 28400000 sequences, 3172412131 bases, 2320412161 kmers +read 28500000 sequences, 3178654828 bases, 2323654858 kmers +read 28600000 sequences, 3184886524 bases, 2326886554 kmers +read 28700000 sequences, 3191117068 bases, 2330117098 kmers +read 28800000 sequences, 3197379540 bases, 2333379570 kmers +read 28900000 sequences, 3203639436 bases, 2336639466 kmers +read 29000000 sequences, 3209891656 bases, 2339891686 kmers +read 29100000 sequences, 3216141216 bases, 2343141246 kmers +read 29200000 sequences, 3222413290 bases, 2346413320 kmers +read 29300000 sequences, 3228708327 bases, 2349708357 kmers +read 29400000 sequences, 3234965760 bases, 2352965790 kmers +read 29500000 sequences, 3241275202 bases, 2356275232 kmers +read 29600000 sequences, 3247560688 bases, 2359560718 kmers +read 29700000 sequences, 3253851429 bases, 2362851459 kmers +read 29800000 sequences, 3260147813 bases, 2366147843 kmers +read 29900000 sequences, 3266443188 bases, 2369443218 kmers +read 30000000 sequences, 3272761129 bases, 2372761159 kmers +read 30100000 sequences, 3279084870 bases, 2376084900 kmers +read 30200000 sequences, 3285396291 bases, 2379396321 kmers +read 30300000 sequences, 3291706637 bases, 2382706667 kmers +read 30400000 sequences, 3298020747 bases, 2386020777 kmers +read 30500000 sequences, 3304364961 bases, 2389364991 kmers +read 30600000 sequences, 3310727391 bases, 2392727421 kmers +read 30700000 sequences, 3317071594 bases, 2396071624 kmers +read 30800000 sequences, 3323415712 bases, 2399415742 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.17.bin'... +read 30900000 sequences, 3329790954 bases, 2402790984 kmers +read 31000000 sequences, 3336150928 bases, 2406150958 kmers +read 31100000 sequences, 3342541624 bases, 2409541654 kmers +read 31200000 sequences, 3348907387 bases, 2412907417 kmers +read 31300000 sequences, 3355301252 bases, 2416301282 kmers +read 31400000 sequences, 3361677930 bases, 2419677960 kmers +read 31500000 sequences, 3368088580 bases, 2423088610 kmers +read 31600000 sequences, 3374497373 bases, 2426497403 kmers +read 31700000 sequences, 3380954973 bases, 2429955003 kmers +read 31800000 sequences, 3387385456 bases, 2433385486 kmers +read 31900000 sequences, 3393821566 bases, 2436821596 kmers +read 32000000 sequences, 3400254669 bases, 2440254699 kmers +read 32100000 sequences, 3406661433 bases, 2443661463 kmers +read 32200000 sequences, 3413130559 bases, 2447130589 kmers +read 32300000 sequences, 3419570382 bases, 2450570412 kmers +read 32400000 sequences, 3426014430 bases, 2454014460 kmers +read 32500000 sequences, 3432484446 bases, 2457484476 kmers +read 32600000 sequences, 3438957018 bases, 2460957048 kmers +read 32700000 sequences, 3445449720 bases, 2464449750 kmers +read 32800000 sequences, 3451918191 bases, 2467918221 kmers +read 32900000 sequences, 3458402208 bases, 2471402238 kmers +read 33000000 sequences, 3464886745 bases, 2474886775 kmers +read 33100000 sequences, 3471383068 bases, 2478383098 kmers +read 33200000 sequences, 3477878747 bases, 2481878777 kmers +read 33300000 sequences, 3484417152 bases, 2485417182 kmers +read 33400000 sequences, 3490941859 bases, 2488941889 kmers +read 33500000 sequences, 3497445803 bases, 2492445833 kmers +read 33600000 sequences, 3503981393 bases, 2495981423 kmers +read 33700000 sequences, 3510547286 bases, 2499547316 kmers +read 33800000 sequences, 3517100145 bases, 2503100175 kmers +read 33900000 sequences, 3523655251 bases, 2506655281 kmers +read 34000000 sequences, 3530247130 bases, 2510247160 kmers +read 34100000 sequences, 3536826114 bases, 2513826144 kmers +read 34200000 sequences, 3543370719 bases, 2517370749 kmers +read 34300000 sequences, 3549941926 bases, 2520941956 kmers +read 34400000 sequences, 3556525084 bases, 2524525114 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.18.bin'... +read 34500000 sequences, 3563130081 bases, 2528130111 kmers +read 34600000 sequences, 3569739780 bases, 2531739810 kmers +read 34700000 sequences, 3576369309 bases, 2535369339 kmers +read 34800000 sequences, 3582984603 bases, 2538984633 kmers +read 34900000 sequences, 3589639338 bases, 2542639368 kmers +read 35000000 sequences, 3596273782 bases, 2546273812 kmers +read 35100000 sequences, 3602915624 bases, 2549915654 kmers +read 35200000 sequences, 3609531465 bases, 2553531495 kmers +read 35300000 sequences, 3616196895 bases, 2557196925 kmers +read 35400000 sequences, 3622877377 bases, 2560877407 kmers +read 35500000 sequences, 3629566990 bases, 2564567020 kmers +read 35600000 sequences, 3636241308 bases, 2568241338 kmers +read 35700000 sequences, 3642914924 bases, 2571914954 kmers +read 35800000 sequences, 3649603351 bases, 2575603381 kmers +read 35900000 sequences, 3656318441 bases, 2579318471 kmers +read 36000000 sequences, 3663044755 bases, 2583044785 kmers +read 36100000 sequences, 3669777945 bases, 2586777975 kmers +read 36200000 sequences, 3676509798 bases, 2590509828 kmers +read 36300000 sequences, 3683277205 bases, 2594277235 kmers +read 36400000 sequences, 3690023259 bases, 2598023289 kmers +read 36500000 sequences, 3696780780 bases, 2601780810 kmers +read 36600000 sequences, 3703553916 bases, 2605553946 kmers +read 36700000 sequences, 3710337571 bases, 2609337601 kmers +read 36800000 sequences, 3717123025 bases, 2613123055 kmers +read 36900000 sequences, 3723910267 bases, 2616910297 kmers +read 37000000 sequences, 3730743453 bases, 2620743483 kmers +read 37100000 sequences, 3737567873 bases, 2624567903 kmers +read 37200000 sequences, 3744378273 bases, 2628378303 kmers +read 37300000 sequences, 3751211086 bases, 2632211116 kmers +read 37400000 sequences, 3758073143 bases, 2636073173 kmers +read 37500000 sequences, 3764943116 bases, 2639943146 kmers +read 37600000 sequences, 3771815746 bases, 2643815776 kmers +read 37700000 sequences, 3778661925 bases, 2647661955 kmers +read 37800000 sequences, 3785547694 bases, 2651547724 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.19.bin'... +read 37900000 sequences, 3792403153 bases, 2655403183 kmers +read 38000000 sequences, 3799297812 bases, 2659297842 kmers +read 38100000 sequences, 3806239974 bases, 2663240004 kmers +read 38200000 sequences, 3813157585 bases, 2667157615 kmers +read 38300000 sequences, 3820098418 bases, 2671098448 kmers +read 38400000 sequences, 3827045678 bases, 2675045708 kmers +read 38500000 sequences, 3834035768 bases, 2679035798 kmers +read 38600000 sequences, 3841003319 bases, 2683003349 kmers +read 38700000 sequences, 3848003705 bases, 2687003735 kmers +read 38800000 sequences, 3854998310 bases, 2690998340 kmers +read 38900000 sequences, 3861999344 bases, 2694999374 kmers +read 39000000 sequences, 3869022053 bases, 2699022083 kmers +read 39100000 sequences, 3876075267 bases, 2703075297 kmers +read 39200000 sequences, 3883150321 bases, 2707150351 kmers +read 39300000 sequences, 3890222625 bases, 2711222655 kmers +read 39400000 sequences, 3897268484 bases, 2715268514 kmers +read 39500000 sequences, 3904370112 bases, 2719370142 kmers +read 39600000 sequences, 3911448030 bases, 2723448060 kmers +read 39700000 sequences, 3918568345 bases, 2727568375 kmers +read 39800000 sequences, 3925645444 bases, 2731645474 kmers +read 39900000 sequences, 3932749408 bases, 2735749438 kmers +read 40000000 sequences, 3939899844 bases, 2739899874 kmers +read 40100000 sequences, 3947016310 bases, 2744016340 kmers +read 40200000 sequences, 3954176373 bases, 2748176403 kmers +read 40300000 sequences, 3961389382 bases, 2752389412 kmers +read 40400000 sequences, 3968552071 bases, 2756552101 kmers +read 40500000 sequences, 3975752223 bases, 2760752253 kmers +read 40600000 sequences, 3982970739 bases, 2764970769 kmers +read 40700000 sequences, 3990152709 bases, 2769152739 kmers +read 40800000 sequences, 3997405356 bases, 2773405386 kmers +read 40900000 sequences, 4004672388 bases, 2777672418 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.20.bin'... +read 41000000 sequences, 4011944292 bases, 2781944322 kmers +read 41100000 sequences, 4019238344 bases, 2786238374 kmers +read 41200000 sequences, 4026519143 bases, 2790519173 kmers +read 41300000 sequences, 4033822566 bases, 2794822596 kmers +read 41400000 sequences, 4041178251 bases, 2799178281 kmers +read 41500000 sequences, 4048493790 bases, 2803493820 kmers +read 41600000 sequences, 4055829038 bases, 2807829068 kmers +read 41700000 sequences, 4063212559 bases, 2812212589 kmers +read 41800000 sequences, 4070629890 bases, 2816629920 kmers +read 41900000 sequences, 4078028063 bases, 2821028093 kmers +read 42000000 sequences, 4085447723 bases, 2825447753 kmers +read 42100000 sequences, 4092897915 bases, 2829897945 kmers +read 42200000 sequences, 4100371858 bases, 2834371888 kmers +read 42300000 sequences, 4107845174 bases, 2838845204 kmers +read 42400000 sequences, 4115310509 bases, 2843310539 kmers +read 42500000 sequences, 4122794181 bases, 2847794211 kmers +read 42600000 sequences, 4130336752 bases, 2852336782 kmers +read 42700000 sequences, 4137838898 bases, 2856838928 kmers +read 42800000 sequences, 4145439502 bases, 2861439532 kmers +read 42900000 sequences, 4153050260 bases, 2866050290 kmers +read 43000000 sequences, 4160667131 bases, 2870667161 kmers +read 43100000 sequences, 4168281208 bases, 2875281238 kmers +read 43200000 sequences, 4175884381 bases, 2879884411 kmers +read 43300000 sequences, 4183510403 bases, 2884510433 kmers +read 43400000 sequences, 4191153977 bases, 2889154007 kmers +read 43500000 sequences, 4198862981 bases, 2893863011 kmers +read 43600000 sequences, 4206635916 bases, 2898635946 kmers +read 43700000 sequences, 4214404635 bases, 2903404665 kmers +read 43800000 sequences, 4222136044 bases, 2908136074 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.21.bin'... +read 43900000 sequences, 4229915489 bases, 2912915519 kmers +read 44000000 sequences, 4237696439 bases, 2917696469 kmers +read 44100000 sequences, 4245541347 bases, 2922541377 kmers +read 44200000 sequences, 4253343569 bases, 2927343599 kmers +read 44300000 sequences, 4261209160 bases, 2932209190 kmers +read 44400000 sequences, 4269076502 bases, 2937076532 kmers +read 44500000 sequences, 4276951507 bases, 2941951537 kmers +read 44600000 sequences, 4284863476 bases, 2946863506 kmers +read 44700000 sequences, 4292779395 bases, 2951779425 kmers +read 44800000 sequences, 4300761505 bases, 2956761535 kmers +read 44900000 sequences, 4308749134 bases, 2961749164 kmers +read 45000000 sequences, 4316730477 bases, 2966730507 kmers +read 45100000 sequences, 4324743898 bases, 2971743928 kmers +read 45200000 sequences, 4332706339 bases, 2976706369 kmers +read 45300000 sequences, 4340799702 bases, 2981799732 kmers +read 45400000 sequences, 4348938044 bases, 2986938074 kmers +read 45500000 sequences, 4357089148 bases, 2992089178 kmers +read 45600000 sequences, 4365213123 bases, 2997213153 kmers +read 45700000 sequences, 4373409255 bases, 3002409285 kmers +read 45800000 sequences, 4381555955 bases, 3007555985 kmers +read 45900000 sequences, 4389760124 bases, 3012760154 kmers +read 46000000 sequences, 4398064658 bases, 3018064688 kmers +read 46100000 sequences, 4406387022 bases, 3023387052 kmers +read 46200000 sequences, 4414694547 bases, 3028694577 kmers +read 46300000 sequences, 4423058645 bases, 3034058675 kmers +read 46400000 sequences, 4431425456 bases, 3039425486 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.22.bin'... +read 46500000 sequences, 4439833395 bases, 3044833425 kmers +read 46600000 sequences, 4448259061 bases, 3050259091 kmers +read 46700000 sequences, 4456674986 bases, 3055675016 kmers +read 46800000 sequences, 4465159451 bases, 3061159481 kmers +read 46900000 sequences, 4473635424 bases, 3066635454 kmers +read 47000000 sequences, 4482251403 bases, 3072251433 kmers +read 47100000 sequences, 4490782771 bases, 3077782801 kmers +read 47200000 sequences, 4499401922 bases, 3083401952 kmers +read 47300000 sequences, 4508109132 bases, 3089109162 kmers +read 47400000 sequences, 4516742321 bases, 3094742351 kmers +read 47500000 sequences, 4525469588 bases, 3100469618 kmers +read 47600000 sequences, 4534242679 bases, 3106242709 kmers +read 47700000 sequences, 4542982876 bases, 3111982906 kmers +read 47800000 sequences, 4551866465 bases, 3117866495 kmers +read 47900000 sequences, 4560699422 bases, 3123699452 kmers +read 48000000 sequences, 4569570230 bases, 3129570260 kmers +read 48100000 sequences, 4578501122 bases, 3135501152 kmers +read 48200000 sequences, 4587457020 bases, 3141457050 kmers +read 48300000 sequences, 4596477426 bases, 3147477456 kmers +read 48400000 sequences, 4605472867 bases, 3153472897 kmers +read 48500000 sequences, 4614570314 bases, 3159570344 kmers +read 48600000 sequences, 4623645571 bases, 3165645601 kmers +read 48700000 sequences, 4632844313 bases, 3171844343 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.23.bin'... +read 48800000 sequences, 4642092013 bases, 3178092043 kmers +read 48900000 sequences, 4651346054 bases, 3184346084 kmers +read 49000000 sequences, 4660631564 bases, 3190631594 kmers +read 49100000 sequences, 4670019434 bases, 3197019464 kmers +read 49200000 sequences, 4679397122 bases, 3203397152 kmers +read 49300000 sequences, 4688892126 bases, 3209892156 kmers +read 49400000 sequences, 4698398402 bases, 3216398432 kmers +read 49500000 sequences, 4707888048 bases, 3222888078 kmers +read 49600000 sequences, 4717466104 bases, 3229466134 kmers +read 49700000 sequences, 4727070074 bases, 3236070104 kmers +read 49800000 sequences, 4736756111 bases, 3242756141 kmers +read 49900000 sequences, 4746452257 bases, 3249452287 kmers +read 50000000 sequences, 4756246274 bases, 3256246304 kmers +read 50100000 sequences, 4766057201 bases, 3263057231 kmers +read 50200000 sequences, 4775878485 bases, 3269878515 kmers +read 50300000 sequences, 4785727317 bases, 3276727347 kmers +read 50400000 sequences, 4795653806 bases, 3283653836 kmers +read 50500000 sequences, 4805712567 bases, 3290712597 kmers +read 50600000 sequences, 4815738486 bases, 3297738516 kmers +read 50700000 sequences, 4825926061 bases, 3304926091 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.24.bin'... +read 50800000 sequences, 4836224392 bases, 3312224422 kmers +read 50900000 sequences, 4846451475 bases, 3319451505 kmers +read 51000000 sequences, 4856753402 bases, 3326753432 kmers +read 51100000 sequences, 4867266326 bases, 3334266356 kmers +read 51200000 sequences, 4877861799 bases, 3341861829 kmers +read 51300000 sequences, 4888414798 bases, 3349414828 kmers +read 51400000 sequences, 4899113746 bases, 3357113776 kmers +read 51500000 sequences, 4909857519 bases, 3364857549 kmers +read 51600000 sequences, 4920671018 bases, 3372671048 kmers +read 51700000 sequences, 4931456761 bases, 3380456791 kmers +read 51800000 sequences, 4942313146 bases, 3388313176 kmers +read 51900000 sequences, 4953292346 bases, 3396292376 kmers +read 52000000 sequences, 4964398640 bases, 3404398670 kmers +read 52100000 sequences, 4975538457 bases, 3412538487 kmers +read 52200000 sequences, 4986759889 bases, 3420759919 kmers +read 52300000 sequences, 4998126697 bases, 3429126727 kmers +read 52400000 sequences, 5009563077 bases, 3437563107 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.25.bin'... +read 52500000 sequences, 5021022583 bases, 3446022613 kmers +read 52600000 sequences, 5032535927 bases, 3454535957 kmers +read 52700000 sequences, 5044218660 bases, 3463218690 kmers +read 52800000 sequences, 5055973619 bases, 3471973649 kmers +read 52900000 sequences, 5067872804 bases, 3480872834 kmers +read 53000000 sequences, 5079791399 bases, 3489791429 kmers +read 53100000 sequences, 5091783497 bases, 3498783527 kmers +read 53200000 sequences, 5103925550 bases, 3507925580 kmers +read 53300000 sequences, 5116240983 bases, 3517241013 kmers +read 53400000 sequences, 5128584923 bases, 3526584953 kmers +read 53500000 sequences, 5140962917 bases, 3535962947 kmers +read 53600000 sequences, 5153500478 bases, 3545500508 kmers +read 53700000 sequences, 5166148888 bases, 3555148918 kmers +read 53800000 sequences, 5178959197 bases, 3564959227 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.26.bin'... +read 53900000 sequences, 5191971403 bases, 3574971433 kmers +read 54000000 sequences, 5205070227 bases, 3585070257 kmers +read 54100000 sequences, 5218297285 bases, 3595297315 kmers +read 54200000 sequences, 5231683093 bases, 3605683123 kmers +read 54300000 sequences, 5245150344 bases, 3616150374 kmers +read 54400000 sequences, 5258739173 bases, 3626739203 kmers +read 54500000 sequences, 5272405047 bases, 3637405077 kmers +read 54600000 sequences, 5286362231 bases, 3648362261 kmers +read 54700000 sequences, 5300436701 bases, 3659436731 kmers +read 54800000 sequences, 5314343049 bases, 3670343079 kmers +read 54900000 sequences, 5328793168 bases, 3681793198 kmers +read 55000000 sequences, 5343494365 bases, 3693494395 kmers +read 55100000 sequences, 5358294825 bases, 3705294855 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.27.bin'... +read 55200000 sequences, 5373204958 bases, 3717204988 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279403255191108.minimizers.28.bin'... +read 55207753 sequences, 5374353539 bases, 3718120949 kmers +num_kmers 3718120949 +num_super_kmers 825700183 +num_pieces 55207754 (+0.890898 [bits/kmer]) +=== step 1: 'parse_file' 266.485 [sec] (71.672 [ns/kmer]) + == files to merge = 29 +num_written_tuples = 50000000 +num_written_tuples = 100000000 +num_written_tuples = 150000000 +num_written_tuples = 200000000 +num_written_tuples = 250000000 +num_written_tuples = 300000000 +num_written_tuples = 350000000 +num_written_tuples = 400000000 +num_written_tuples = 450000000 +num_written_tuples = 500000000 +num_written_tuples = 550000000 +num_written_tuples = 600000000 +num_written_tuples = 650000000 +num_written_tuples = 700000000 +num_written_tuples = 750000000 +num_written_tuples = 800000000 +num_written_tuples = 825700183 +num_minimizers 624010762 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 65.3272 [sec] (17.5699 [ns/kmer]) +bits_per_offset = ceil(log2(5374353570)) = 33 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279735999139401.bucket_pairs.0.bin'... +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279735999139401.bucket_pairs.1.bin'... +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279735999139401.bucket_pairs.2.bin'... +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279735999139401.bucket_pairs.3.bin'... +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279735999139401.bucket_pairs.4.bin'... +num_singletons 521231883/624010762 (83.5293%) + == files to merge = 5 +num_written_pairs = 50000000 +num_written_pairs = 100000000 +num_written_pairs = 102778879 +=== step 3: 'build_index' 216.456 [sec] (58.2165 [ns/kmer]) +max_num_super_kmers_in_bucket 95844 +log2_max_num_super_kmers_in_bucket 17 +num_buckets_in_skew_index 156279/624010762 (0.0250443%) +num_partitions 7 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 35996374 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 26993825 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 19373179 + partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 13381657 + partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 8418352 + partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 5915012 + partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 95844: 7265419 +num_kmers_in_skew_index 117343818 (3.156%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 35996374 + building MPHF with 64 threads and 256 partitions... + built mphs[0] for 35996374 keys; bits/key = 2.79838 + built positions[0] for 35996374 keys; bits/key = 7.00001 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 26993825 + building MPHF with 64 threads and 256 partitions... + built mphs[1] for 26993825 keys; bits/key = 2.78053 + built positions[1] for 26993825 keys; bits/key = 8.00001 + lower 256; upper 512; num_bits_per_pos 9; keys_in_partition.size() 19373179 + building MPHF with 64 threads and 256 partitions... + built mphs[2] for 19373179 keys; bits/key = 2.86228 + built positions[2] for 19373179 keys; bits/key = 9.00002 + lower 512; upper 1024; num_bits_per_pos 10; keys_in_partition.size() 13381657 + building MPHF with 64 threads and 256 partitions... + built mphs[3] for 13381657 keys; bits/key = 2.9839 + built positions[3] for 13381657 keys; bits/key = 10 + lower 1024; upper 2048; num_bits_per_pos 11; keys_in_partition.size() 8418352 + building MPHF with 64 threads and 256 partitions... + built mphs[4] for 8418352 keys; bits/key = 3.02475 + built positions[4] for 8418352 keys; bits/key = 11 + lower 2048; upper 4096; num_bits_per_pos 12; keys_in_partition.size() 5915012 + building MPHF with 64 threads and 256 partitions... + built mphs[5] for 5915012 keys; bits/key = 3.15304 + built positions[5] for 5915012 keys; bits/key = 12.0001 + lower 4096; upper 95844; num_bits_per_pos 17; keys_in_partition.size() 7265419 + building MPHF with 64 threads and 256 partitions... + built mphs[6] for 7265419 keys; bits/key = 3.07772 + built positions[6] for 7265419 keys; bits/key = 17 +num_bits_for_skew_index 1400841344(0.376761 [bits/kmer]) +=== step 4: 'build_skew_index' 18.1439 [sec] (4.87987 [ns/kmer]) +=== total_time 566.412 [sec] (152.338 [ns/kmer]) +total index size: 5345319756 [B] -- 5345.32 [MB] +SPACE BREAKDOWN: + minimizers: 0.440906 [bits/kmer] (2.62711 [bits/key]) -- 3.8336% + pieces: 0.14758 [bits/kmer] -- 1.28318% + num_super_kmers_before_bucket: 0.316514 [bits/kmer] -- 2.75203% + offsets: 7.32846 [bits/kmer] -- 63.7195% + strings: 2.8909 [bits/kmer] -- 25.1358% + skew_index: 0.376761 [bits/kmer] -- 3.27586% + weights: 3.95899e-07 [bits/kmer] -- 3.44226e-06% + weight_interval_values: 6.8852e-08 [bits/kmer] + weight_interval_lengths: 2.58195e-07 [bits/kmer] + weight_dictionary: 6.8852e-08 [bits/kmer] + -------------- + total: 11.5011 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 83.5293% +buckets with 2 super_kmers = 12.8251% +buckets with 3 super_kmers = 2.10141% +buckets with 4 super_kmers = 0.585352% +buckets with 5 super_kmers = 0.255767% +buckets with 6 super_kmers = 0.149049% +buckets with 7 super_kmers = 0.0987449% +buckets with 8 super_kmers = 0.0706794% +buckets with 9 super_kmers = 0.0531121% +buckets with 10 super_kmers = 0.0414318% +buckets with 11 super_kmers = 0.0328145% +buckets with 12 super_kmers = 0.0267737% +buckets with 13 super_kmers = 0.0223161% +buckets with 14 super_kmers = 0.0187769% +buckets with 15 super_kmers = 0.0158791% +buckets with 16 super_kmers = 0.0137246% +max_num_super_kmers_in_bucket 95844 +2025-11-27 22:46:11: saving data structure to disk... +2025-11-27 22:46:13: DONE diff --git a/benchmarks/results-27-11-25-v3/k31/canon-build.time.log b/benchmarks/results-27-11-25-v3/k31/canon-build.time.log new file mode 100644 index 0000000..3f6adb8 --- /dev/null +++ b/benchmarks/results-27-11-25-v3/k31/canon-build.time.log @@ -0,0 +1,138 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k31.eulertigs.fa.gz -k 31 -m 20 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/cod.k31.canon.sshash --canonical-parsing" + User time (seconds): 77.68 + System time (seconds): 6.18 + Percent of CPU this job got: 136% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:01.24 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 3894516 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 1483485 + Voluntary context switches: 455 + Involuntary context switches: 674 + Swaps: 0 + File system inputs: 0 + File system outputs: 10960280 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k31.eulertigs.fa.gz -k 31 -m 20 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/kestrel.k31.canon.sshash --canonical-parsing" + User time (seconds): 191.92 + System time (seconds): 13.81 + Percent of CPU this job got: 132% + Elapsed (wall clock) time (h:mm:ss or m:ss): 2:34.82 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 8512312 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 3078706 + Voluntary context switches: 423 + Involuntary context switches: 1385 + Swaps: 0 + File system inputs: 0 + File system outputs: 25283168 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/human.k31.canon.sshash --canonical-parsing" + User time (seconds): 534.98 + System time (seconds): 31.40 + Percent of CPU this job got: 156% + Elapsed (wall clock) time (h:mm:ss or m:ss): 6:02.78 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 15455788 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 6071604 + Voluntary context switches: 962 + Involuntary context switches: 3836 + Swaps: 0 + File system inputs: 0 + File system outputs: 59862192 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k31.eulertigs.fa.gz -k 31 -m 19 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/ncbi-virus.k31.canon.sshash --canonical-parsing" + User time (seconds): 52.13 + System time (seconds): 4.61 + Percent of CPU this job got: 127% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:44.38 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 2789936 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 1118194 + Voluntary context switches: 244 + Involuntary context switches: 476 + Swaps: 0 + File system inputs: 0 + File system outputs: 7798200 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/se.k31.canon.sshash --canonical-parsing" + User time (seconds): 151.97 + System time (seconds): 11.62 + Percent of CPU this job got: 135% + Elapsed (wall clock) time (h:mm:ss or m:ss): 2:00.37 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 7380696 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 2631466 + Voluntary context switches: 434 + Involuntary context switches: 1176 + Swaps: 0 + File system inputs: 0 + File system outputs: 22052600 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k31.eulertigs.fa.gz -k 31 -m 21 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/hprc.k31.canon.sshash --canonical-parsing" + User time (seconds): 959.65 + System time (seconds): 48.93 + Percent of CPU this job got: 176% + Elapsed (wall clock) time (h:mm:ss or m:ss): 9:30.84 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 21183980 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 10595840 + Voluntary context switches: 922 + Involuntary context switches: 6040 + Swaps: 0 + File system inputs: 0 + File system outputs: 92217392 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-27-11-25-v3/k31/canon-streaming-queries-high-hit.log b/benchmarks/results-27-11-25-v3/k31/canon-streaming-queries-high-hit.log new file mode 100644 index 0000000..4291c99 --- /dev/null +++ b/benchmarks/results-27-11-25-v3/k31/canon-streaming-queries-high-hit.log @@ -0,0 +1,48 @@ +2025-11-28 15:41:55: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz'... +2025-11-28 15:42:10: DONE +==== query report: +num_kmers = 163287360 +num_positive_kmers = 132860997 (81.3664%) +num_searches = 23768683/132860997 (17.8899%) +num_extensions = 109092314/132860997 (82.1101%) +elapsed = 14474.1 millisec / 14.4741 sec / 0.241235 min / 88.642 ns/kmer +2025-11-28 15:42:10: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz'... +2025-11-28 15:44:10: DONE +==== query report: +num_kmers = 695737535 +num_positive_kmers = 525542891 (75.5375%) +num_searches = 83672136/525542891 (15.9211%) +num_extensions = 441870755/525542891 (84.0789%) +elapsed = 119068 millisec / 119.068 sec / 1.98447 min / 171.14 ns/kmer +2025-11-28 15:44:11: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2025-11-28 15:51:23: DONE +==== query report: +num_kmers = 1569974986 +num_positive_kmers = 1437949378 (91.5906%) +num_searches = 298261222/1437949378 (20.7421%) +num_extensions = 1139688156/1437949378 (79.2579%) +elapsed = 432143 millisec / 432.143 sec / 7.20239 min / 275.255 ns/kmer +2025-11-28 15:51:23: performing queries from file '/mnt/hd2/pibiri/DNA/queries/ncbi-queries.fastq.gz'... +2025-11-28 15:51:25: DONE +==== query report: +num_kmers = 14092875 +num_positive_kmers = 13983775 (99.2258%) +num_searches = 2252811/13983775 (16.1102%) +num_extensions = 11730964/13983775 (83.8898%) +elapsed = 1637.58 millisec / 1.63758 sec / 0.027293 min / 116.199 ns/kmer +2025-11-28 15:51:25: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR27871075_1.fastq.gz'... +2025-11-28 15:57:32: DONE +==== query report: +num_kmers = 789838196 +num_positive_kmers = 764882549 (96.8404%) +num_searches = 288913261/764882549 (37.7722%) +num_extensions = 475969288/764882549 (62.2278%) +elapsed = 366794 millisec / 366.794 sec / 6.11324 min / 464.392 ns/kmer +2025-11-28 15:57:34: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2025-11-28 16:06:20: DONE +==== query report: +num_kmers = 1569974986 +num_positive_kmers = 1485223278 (94.6017%) +num_searches = 332686229/1485223278 (22.3997%) +num_extensions = 1152537049/1485223278 (77.6003%) +elapsed = 525032 millisec / 525.032 sec / 8.75054 min / 334.421 ns/kmer diff --git a/benchmarks/results-27-11-25-v3/k31/regular-bench.log b/benchmarks/results-27-11-25-v3/k31/regular-bench.log new file mode 100644 index 0000000..d00b045 --- /dev/null +++ b/benchmarks/results-27-11-25-v3/k31/regular-bench.log @@ -0,0 +1,108 @@ +avg_nanosec_per_positive_lookup 978.981 +avg_nanosec_per_negative_lookup 1132.96 +avg_nanosec_per_positive_lookup_advanced 960.619 +avg_nanosec_per_negative_lookup_advanced 1140.81 +avg_nanosec_per_access 292.259 +iterator: avg_nanosec_per_kmer 13.7756 +avg_nanosec_per_positive_lookup 986.988 +avg_nanosec_per_negative_lookup 1142.34 +avg_nanosec_per_positive_lookup_advanced 958.331 +avg_nanosec_per_negative_lookup_advanced 1131.72 +avg_nanosec_per_access 292.949 +iterator: avg_nanosec_per_kmer 13.7967 +avg_nanosec_per_positive_lookup 968.186 +avg_nanosec_per_negative_lookup 1149.01 +avg_nanosec_per_positive_lookup_advanced 971.836 +avg_nanosec_per_negative_lookup_advanced 1133.44 +avg_nanosec_per_access 296.964 +iterator: avg_nanosec_per_kmer 13.8817 +avg_nanosec_per_positive_lookup 1022.64 +avg_nanosec_per_negative_lookup 1293.86 +avg_nanosec_per_positive_lookup_advanced 1008.02 +avg_nanosec_per_negative_lookup_advanced 1291.52 +avg_nanosec_per_access 263.68 +iterator: avg_nanosec_per_kmer 13.5254 +avg_nanosec_per_positive_lookup 995.541 +avg_nanosec_per_negative_lookup 1284.3 +avg_nanosec_per_positive_lookup_advanced 991.168 +avg_nanosec_per_negative_lookup_advanced 1282.28 +avg_nanosec_per_access 264.028 +iterator: avg_nanosec_per_kmer 13.6361 +avg_nanosec_per_positive_lookup 1021.09 +avg_nanosec_per_negative_lookup 1309.98 +avg_nanosec_per_positive_lookup_advanced 1004.92 +avg_nanosec_per_negative_lookup_advanced 1302.81 +avg_nanosec_per_access 262.37 +iterator: avg_nanosec_per_kmer 13.9982 +avg_nanosec_per_positive_lookup 1390.89 +avg_nanosec_per_negative_lookup 1620.25 +avg_nanosec_per_positive_lookup_advanced 1369.7 +avg_nanosec_per_negative_lookup_advanced 1600.59 +avg_nanosec_per_access 368.619 +iterator: avg_nanosec_per_kmer 13.5873 +avg_nanosec_per_positive_lookup 1390.01 +avg_nanosec_per_negative_lookup 1612.37 +avg_nanosec_per_positive_lookup_advanced 1392.58 +avg_nanosec_per_negative_lookup_advanced 1603.12 +avg_nanosec_per_access 370.64 +iterator: avg_nanosec_per_kmer 13.5535 +avg_nanosec_per_positive_lookup 1376.73 +avg_nanosec_per_negative_lookup 1623.33 +avg_nanosec_per_positive_lookup_advanced 1367.13 +avg_nanosec_per_negative_lookup_advanced 1597.43 +avg_nanosec_per_access 370.045 +iterator: avg_nanosec_per_kmer 13.5485 +avg_nanosec_per_positive_lookup 826.225 +avg_nanosec_per_negative_lookup 1075.4 +avg_nanosec_per_positive_lookup_advanced 805.128 +avg_nanosec_per_negative_lookup_advanced 1052.47 +avg_nanosec_per_access 287.705 +iterator: avg_nanosec_per_kmer 13.615 +avg_nanosec_per_positive_lookup 805.683 +avg_nanosec_per_negative_lookup 1071.25 +avg_nanosec_per_positive_lookup_advanced 809.982 +avg_nanosec_per_negative_lookup_advanced 1055.7 +avg_nanosec_per_access 282.47 +iterator: avg_nanosec_per_kmer 13.8545 +avg_nanosec_per_positive_lookup 824.257 +avg_nanosec_per_negative_lookup 1068.3 +avg_nanosec_per_positive_lookup_advanced 830.463 +avg_nanosec_per_negative_lookup_advanced 1066.87 +avg_nanosec_per_access 282.154 +iterator: avg_nanosec_per_kmer 13.8504 +avg_nanosec_per_positive_lookup 1482.44 +avg_nanosec_per_negative_lookup 1615.74 +avg_nanosec_per_positive_lookup_advanced 1446.03 +avg_nanosec_per_negative_lookup_advanced 1601.59 +avg_nanosec_per_access 388.119 +iterator: avg_nanosec_per_kmer 14.0195 +avg_nanosec_per_positive_lookup 1475.55 +avg_nanosec_per_negative_lookup 1612.43 +avg_nanosec_per_positive_lookup_advanced 1449.14 +avg_nanosec_per_negative_lookup_advanced 1590.2 +avg_nanosec_per_access 389.131 +iterator: avg_nanosec_per_kmer 14.2169 +avg_nanosec_per_positive_lookup 1475.76 +avg_nanosec_per_negative_lookup 1628.75 +avg_nanosec_per_positive_lookup_advanced 1451.51 +avg_nanosec_per_negative_lookup_advanced 1585.93 +avg_nanosec_per_access 384.968 +iterator: avg_nanosec_per_kmer 14.0822 +avg_nanosec_per_positive_lookup 1951.83 +avg_nanosec_per_negative_lookup 2164.91 +avg_nanosec_per_positive_lookup_advanced 1970.19 +avg_nanosec_per_negative_lookup_advanced 2144.56 +avg_nanosec_per_access 600.742 +iterator: avg_nanosec_per_kmer 13.9579 +avg_nanosec_per_positive_lookup 1973.43 +avg_nanosec_per_negative_lookup 2128.58 +avg_nanosec_per_positive_lookup_advanced 1929.4 +avg_nanosec_per_negative_lookup_advanced 2127.99 +avg_nanosec_per_access 599.777 +iterator: avg_nanosec_per_kmer 13.6554 +avg_nanosec_per_positive_lookup 1985.53 +avg_nanosec_per_negative_lookup 2145.04 +avg_nanosec_per_positive_lookup_advanced 1941.35 +avg_nanosec_per_negative_lookup_advanced 2111.76 +avg_nanosec_per_access 595.081 +iterator: avg_nanosec_per_kmer 13.9286 diff --git a/benchmarks/results-27-11-25-v3/k31/regular-build.log b/benchmarks/results-27-11-25-v3/k31/regular-build.log new file mode 100644 index 0000000..21c3089 --- /dev/null +++ b/benchmarks/results-27-11-25-v3/k31/regular-build.log @@ -0,0 +1,1565 @@ +k = 31, m = 20, seed = 1, l = 6, c = 3, canonical_parsing = false, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/cod.k31.eulertigs.fa.gz'... +m_buffer_size 29411764 +read 100000 sequences, 25039667 bases, 22039697 kmers +read 200000 sequences, 50140708 bases, 44140738 kmers +read 300000 sequences, 75429441 bases, 66429471 kmers +read 400000 sequences, 100861228 bases, 88861258 kmers +read 500000 sequences, 126668305 bases, 111668335 kmers +read 600000 sequences, 152842148 bases, 134842178 kmers +read 700000 sequences, 179047050 bases, 158047080 kmers +read 800000 sequences, 205700376 bases, 181700406 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277673890665311.minimizers.0.bin'... +read 900000 sequences, 232873950 bases, 205873980 kmers +read 1000000 sequences, 260757565 bases, 230757595 kmers +read 1100000 sequences, 290088622 bases, 257088652 kmers +read 1200000 sequences, 322579647 bases, 286579677 kmers +read 1300000 sequences, 361073626 bases, 322073656 kmers +read 1400000 sequences, 398961301 bases, 356961331 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277673890665311.minimizers.1.bin'... +read 1500000 sequences, 424322286 bases, 379322316 kmers +read 1600000 sequences, 449411932 bases, 401411962 kmers +read 1700000 sequences, 474428195 bases, 423428225 kmers +read 1800000 sequences, 499637061 bases, 445637091 kmers +read 1900000 sequences, 524718926 bases, 467718956 kmers +read 2000000 sequences, 549832029 bases, 489832059 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277673890665311.minimizers.2.bin'... +read 2057242 sequences, 564182460 bases, 502465200 kmers +num_kmers 502465200 +num_super_kmers 78977996 +num_pieces 2057243 (+0.245658 [bits/kmer]) +=== step 1: 'parse_file' 24.3023 [sec] (48.3661 [ns/kmer]) + == files to merge = 3 +num_written_tuples = 50000000 +num_written_tuples = 78977996 +num_minimizers 72949383 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 4.67916 [sec] (9.31241 [ns/kmer]) +bits_per_offset = ceil(log2(564182491)) = 30 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277702952335794.bucket_pairs.0.bin'... +num_singletons 70664621/72949383 (96.868%) +=== step 3: 'build_index' 13.4491 [sec] (26.7663 [ns/kmer]) +max_num_super_kmers_in_bucket 15010 +log2_max_num_super_kmers_in_bucket 14 +num_buckets_in_skew_index 4233/72949383 (0.00580265%) +num_partitions 7 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 1256367 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 1008582 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 704270 + partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 516052 + partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 416157 + partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 408228 + partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 15010: 269630 +num_kmers_in_skew_index 4579286 (0.911364%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 1256367 + building MPHF with 64 threads and 62 partitions... + built mphs[0] for 1256367 keys; bits/key = 3.03817 + built positions[0] for 1256367 keys; bits/key = 7.0003 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 1008582 + building MPHF with 64 threads and 50 partitions... + built mphs[1] for 1008582 keys; bits/key = 2.9897 + built positions[1] for 1008582 keys; bits/key = 8.00033 + lower 256; upper 512; num_bits_per_pos 9; keys_in_partition.size() 704270 + building MPHF with 64 threads and 35 partitions... + built mphs[2] for 704270 keys; bits/key = 2.97527 + built positions[2] for 704270 keys; bits/key = 9.00046 + lower 512; upper 1024; num_bits_per_pos 10; keys_in_partition.size() 516052 + building MPHF with 64 threads and 25 partitions... + built mphs[3] for 516052 keys; bits/key = 2.95704 + built positions[3] for 516052 keys; bits/key = 10.0007 + lower 1024; upper 2048; num_bits_per_pos 11; keys_in_partition.size() 416157 + building MPHF with 64 threads and 20 partitions... + built mphs[4] for 416157 keys; bits/key = 2.93739 + built positions[4] for 416157 keys; bits/key = 11.0008 + lower 2048; upper 4096; num_bits_per_pos 12; keys_in_partition.size() 408228 + building MPHF with 64 threads and 20 partitions... + built mphs[5] for 408228 keys; bits/key = 2.96254 + built positions[5] for 408228 keys; bits/key = 12.0008 + lower 4096; upper 15010; num_bits_per_pos 14; keys_in_partition.size() 269630 + building MPHF with 64 threads and 13 partitions... + built mphs[6] for 269630 keys; bits/key = 2.9376 + built positions[6] for 269630 keys; bits/key = 14.0013 +num_bits_for_skew_index 55293728(0.110045 [bits/kmer]) +=== step 4: 'build_skew_index' 0.674765 [sec] (1.34291 [ns/kmer]) +=== total_time 43.1054 [sec] (85.7877 [ns/kmer]) +total index size: 486626776 [B] -- 486.627 [MB] +SPACE BREAKDOWN: + minimizers: 0.391835 [bits/kmer] (2.6989 [bits/key]) -- 5.05735% + pieces: 0.0460074 [bits/kmer] -- 0.59381% + num_super_kmers_before_bucket: 0.238849 [bits/kmer] -- 3.08278% + offsets: 4.71543 [bits/kmer] -- 60.8613% + strings: 2.24566 [bits/kmer] -- 28.9844% + skew_index: 0.110045 [bits/kmer] -- 1.42033% + weights: 2.92956e-06 [bits/kmer] -- 3.78113e-05% + weight_interval_values: 5.09488e-07 [bits/kmer] + weight_interval_lengths: 1.91058e-06 [bits/kmer] + weight_dictionary: 5.09488e-07 [bits/kmer] + -------------- + total: 7.74783 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 96.868% +buckets with 2 super_kmers = 2.00603% +buckets with 3 super_kmers = 0.519181% +buckets with 4 super_kmers = 0.211233% +buckets with 5 super_kmers = 0.109983% +buckets with 6 super_kmers = 0.0659978% +buckets with 7 super_kmers = 0.0442074% +buckets with 8 super_kmers = 0.0308885% +buckets with 9 super_kmers = 0.0232874% +buckets with 10 super_kmers = 0.0180426% +buckets with 11 super_kmers = 0.0139809% +buckets with 12 super_kmers = 0.0113819% +buckets with 13 super_kmers = 0.00931879% +buckets with 14 super_kmers = 0.00762721% +buckets with 15 super_kmers = 0.00619882% +buckets with 16 super_kmers = 0.00540923% +max_num_super_kmers_in_bucket 15010 +2025-11-27 22:08:37: saving data structure to disk... +2025-11-27 22:08:37: DONE +k = 31, m = 20, seed = 1, l = 6, c = 3, canonical_parsing = false, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/kestrel.k31.eulertigs.fa.gz'... +m_buffer_size 29411764 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277717467247535.minimizers.0.bin'... +read 100000 sequences, 213090615 bases, 210090645 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277717467247535.minimizers.1.bin'... +read 200000 sequences, 390706022 bases, 384706052 kmers +read 300000 sequences, 575071881 bases, 566071911 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277717467247535.minimizers.2.bin'... +read 400000 sequences, 764532455 bases, 752532485 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277717467247535.minimizers.3.bin'... +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277717467247535.minimizers.4.bin'... +read 500000 sequences, 971034152 bases, 956034182 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277717467247535.minimizers.5.bin'... +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277717467247535.minimizers.6.bin'... +read 582860 sequences, 1167885005 bases, 1150399205 kmers +num_kmers 1150399205 +num_super_kmers 177472930 +num_pieces 582861 (+0.0303996 [bits/kmer]) +=== step 1: 'parse_file' 54.5614 [sec] (47.4282 [ns/kmer]) + == files to merge = 7 +num_written_tuples = 50000000 +num_written_tuples = 100000000 +num_written_tuples = 150000000 +num_written_tuples = 177472930 +num_minimizers 174750712 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 12.0222 [sec] (10.4504 [ns/kmer]) +bits_per_offset = ceil(log2(1167885036)) = 31 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277784235171881.bucket_pairs.0.bin'... +num_singletons 173424689/174750712 (99.2412%) +=== step 3: 'build_index' 42.3889 [sec] (36.8472 [ns/kmer]) +max_num_super_kmers_in_bucket 3874 +log2_max_num_super_kmers_in_bucket 12 +num_buckets_in_skew_index 1487/174750712 (0.000850926%) +num_partitions 6 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 534745 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 306737 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 198082 + partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 68926 + partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 15246 + partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 3874: 38941 +num_kmers_in_skew_index 1162677 (0.101067%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 534745 + building MPHF with 64 threads and 26 partitions... + built mphs[0] for 534745 keys; bits/key = 2.94929 + built positions[0] for 534745 keys; bits/key = 7.00063 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 306737 + building MPHF with 64 threads and 15 partitions... + built mphs[1] for 306737 keys; bits/key = 2.93166 + built positions[1] for 306737 keys; bits/key = 8.00123 + lower 256; upper 512; num_bits_per_pos 9; keys_in_partition.size() 198082 + building MPHF with 64 threads and 9 partitions... + built mphs[2] for 198082 keys; bits/key = 2.88551 + built positions[2] for 198082 keys; bits/key = 9.00185 + lower 512; upper 1024; num_bits_per_pos 10; keys_in_partition.size() 68926 + building MPHF with 64 threads and 3 partitions... + built mphs[3] for 68926 keys; bits/key = 2.89563 + built positions[3] for 68926 keys; bits/key = 10.0049 + lower 1024; upper 2048; num_bits_per_pos 11; keys_in_partition.size() 15246 + building MPHF with 64 threads and 1 partitions... + built mphs[4] for 15246 keys; bits/key = 2.96681 + built positions[4] for 15246 keys; bits/key = 11.0235 + lower 2048; upper 3874; num_bits_per_pos 12; keys_in_partition.size() 38941 + building MPHF with 64 threads and 1 partitions... + built mphs[5] for 38941 keys; bits/key = 2.74056 + built positions[5] for 38941 keys; bits/key = 12.0091 +num_bits_for_skew_index 12705904(0.0110448 [bits/kmer]) +=== step 4: 'build_skew_index' 0.726832 [sec] (0.631809 [ns/kmer]) +=== total_time 109.699 [sec] (95.3576 [ns/kmer]) +total index size: 1074747844 [B] -- 1074.75 [MB] +SPACE BREAKDOWN: + minimizers: 0.402947 [bits/kmer] (2.65264 [bits/key]) -- 5.39138% + pieces: 0.00740836 [bits/kmer] -- 0.099123% + num_super_kmers_before_bucket: 0.239718 [bits/kmer] -- 3.20739% + offsets: 4.78239 [bits/kmer] -- 63.9878% + strings: 2.0304 [bits/kmer] -- 27.1665% + skew_index: 0.0110448 [bits/kmer] -- 0.147778% + weights: 1.27956e-06 [bits/kmer] -- 1.71203e-05% + weight_interval_values: 2.22531e-07 [bits/kmer] + weight_interval_lengths: 8.34493e-07 [bits/kmer] + weight_dictionary: 2.22531e-07 [bits/kmer] + -------------- + total: 7.47391 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 99.2412% +buckets with 2 super_kmers = 0.572493% +buckets with 3 super_kmers = 0.083761% +buckets with 4 super_kmers = 0.0344628% +buckets with 5 super_kmers = 0.0184577% +buckets with 6 super_kmers = 0.0113899% +buckets with 7 super_kmers = 0.00777565% +buckets with 8 super_kmers = 0.00535448% +buckets with 9 super_kmers = 0.00398053% +buckets with 10 super_kmers = 0.0029791% +buckets with 11 super_kmers = 0.00242917% +buckets with 12 super_kmers = 0.00197024% +buckets with 13 super_kmers = 0.00158855% +buckets with 14 super_kmers = 0.00135393% +buckets with 15 super_kmers = 0.00107811% +buckets with 16 super_kmers = 0.000880111% +max_num_super_kmers_in_bucket 3874 +2025-11-27 22:10:27: saving data structure to disk... +2025-11-27 22:10:27: DONE +k = 31, m = 21, seed = 1, l = 6, c = 3, canonical_parsing = false, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz'... +m_buffer_size 29411764 +read 100000 sequences, 24154188 bases, 21154218 kmers +read 200000 sequences, 48616692 bases, 42616722 kmers +read 300000 sequences, 73131027 bases, 64131057 kmers +read 400000 sequences, 97783723 bases, 85783753 kmers +read 500000 sequences, 122219519 bases, 107219549 kmers +read 600000 sequences, 146714842 bases, 128714872 kmers +read 700000 sequences, 171233673 bases, 150233703 kmers +read 800000 sequences, 195693297 bases, 171693327 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277828104044627.minimizers.0.bin'... +read 900000 sequences, 220477596 bases, 193477626 kmers +read 1000000 sequences, 245136480 bases, 215136510 kmers +read 1100000 sequences, 269861488 bases, 236861518 kmers +read 1200000 sequences, 294553400 bases, 258553430 kmers +read 1300000 sequences, 319281499 bases, 280281529 kmers +read 1400000 sequences, 344258826 bases, 302258856 kmers +read 1500000 sequences, 368900321 bases, 323900351 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277828104044627.minimizers.1.bin'... +read 1600000 sequences, 393909637 bases, 345909667 kmers +read 1700000 sequences, 418576764 bases, 367576794 kmers +read 1800000 sequences, 443411012 bases, 389411042 kmers +read 1900000 sequences, 468034254 bases, 411034284 kmers +read 2000000 sequences, 492922181 bases, 432922211 kmers +read 2100000 sequences, 517852856 bases, 454852886 kmers +read 2200000 sequences, 542402258 bases, 476402288 kmers +read 2300000 sequences, 567260857 bases, 498260887 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277828104044627.minimizers.2.bin'... +read 2400000 sequences, 592478462 bases, 520478492 kmers +read 2500000 sequences, 617295027 bases, 542295057 kmers +read 2600000 sequences, 642487995 bases, 564488025 kmers +read 2700000 sequences, 667681489 bases, 586681519 kmers +read 2800000 sequences, 693019221 bases, 609019251 kmers +read 2900000 sequences, 718419082 bases, 631419112 kmers +read 3000000 sequences, 743136645 bases, 653136675 kmers +read 3100000 sequences, 768135598 bases, 675135628 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277828104044627.minimizers.3.bin'... +read 3200000 sequences, 793535023 bases, 697535053 kmers +read 3300000 sequences, 819156616 bases, 720156646 kmers +read 3400000 sequences, 844256779 bases, 742256809 kmers +read 3500000 sequences, 869741880 bases, 764741910 kmers +read 3600000 sequences, 895152151 bases, 787152181 kmers +read 3700000 sequences, 920884314 bases, 809884344 kmers +read 3800000 sequences, 946277252 bases, 832277282 kmers +read 3900000 sequences, 972103084 bases, 855103114 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277828104044627.minimizers.4.bin'... +read 4000000 sequences, 997901794 bases, 877901824 kmers +read 4100000 sequences, 1023962565 bases, 900962595 kmers +read 4200000 sequences, 1050002905 bases, 924002935 kmers +read 4300000 sequences, 1076025926 bases, 947025956 kmers +read 4400000 sequences, 1101901550 bases, 969901580 kmers +read 4500000 sequences, 1127998210 bases, 992998240 kmers +read 4600000 sequences, 1153713252 bases, 1015713282 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277828104044627.minimizers.5.bin'... +read 4700000 sequences, 1179840867 bases, 1038840897 kmers +read 4800000 sequences, 1205900933 bases, 1061900963 kmers +read 4900000 sequences, 1232271094 bases, 1085271124 kmers +read 5000000 sequences, 1259012297 bases, 1109012327 kmers +read 5100000 sequences, 1285390407 bases, 1132390437 kmers +read 5200000 sequences, 1312574199 bases, 1156574229 kmers +read 5300000 sequences, 1339714447 bases, 1180714477 kmers +read 5400000 sequences, 1366712530 bases, 1204712560 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277828104044627.minimizers.6.bin'... +read 5500000 sequences, 1394310431 bases, 1229310461 kmers +read 5600000 sequences, 1421869864 bases, 1253869894 kmers +read 5700000 sequences, 1449547608 bases, 1278547638 kmers +read 5800000 sequences, 1477685978 bases, 1303686008 kmers +read 5900000 sequences, 1505662869 bases, 1328662899 kmers +read 6000000 sequences, 1534165192 bases, 1354165222 kmers +read 6100000 sequences, 1562404847 bases, 1379404877 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277828104044627.minimizers.7.bin'... +read 6200000 sequences, 1591527035 bases, 1405527065 kmers +read 6300000 sequences, 1620530378 bases, 1431530408 kmers +read 6400000 sequences, 1650356135 bases, 1458356165 kmers +read 6500000 sequences, 1680100604 bases, 1485100634 kmers +read 6600000 sequences, 1709838161 bases, 1511838191 kmers +read 6700000 sequences, 1739768824 bases, 1538768854 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277828104044627.minimizers.8.bin'... +read 6800000 sequences, 1771033237 bases, 1567033267 kmers +read 6900000 sequences, 1802734155 bases, 1595734185 kmers +read 7000000 sequences, 1835088122 bases, 1625088152 kmers +read 7100000 sequences, 1868202990 bases, 1655203020 kmers +read 7200000 sequences, 1901851887 bases, 1685851917 kmers +read 7300000 sequences, 1936102636 bases, 1717102666 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277828104044627.minimizers.9.bin'... +read 7400000 sequences, 1971764013 bases, 1749764043 kmers +read 7500000 sequences, 2008379618 bases, 1783379648 kmers +read 7600000 sequences, 2046720278 bases, 1818720308 kmers +read 7700000 sequences, 2086661084 bases, 1855661114 kmers +read 7800000 sequences, 2129062919 bases, 1895062949 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277828104044627.minimizers.10.bin'... +read 7900000 sequences, 2174787944 bases, 1937787974 kmers +read 8000000 sequences, 2224822145 bases, 1984822175 kmers +read 8100000 sequences, 2283235897 bases, 2040235927 kmers +read 8200000 sequences, 2311905284 bases, 2065905314 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277828104044627.minimizers.11.bin'... +read 8300000 sequences, 2336057180 bases, 2087057210 kmers +read 8400000 sequences, 2360509664 bases, 2108509694 kmers +read 8500000 sequences, 2384831256 bases, 2129831286 kmers +read 8600000 sequences, 2409417232 bases, 2151417262 kmers +read 8700000 sequences, 2433616800 bases, 2172616830 kmers +read 8800000 sequences, 2458186051 bases, 2194186081 kmers +read 8900000 sequences, 2482411631 bases, 2215411661 kmers +read 9000000 sequences, 2506972327 bases, 2236972357 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277828104044627.minimizers.12.bin'... +read 9100000 sequences, 2531678245 bases, 2258678275 kmers +read 9200000 sequences, 2555989557 bases, 2279989587 kmers +read 9300000 sequences, 2580569574 bases, 2301569604 kmers +read 9400000 sequences, 2605362813 bases, 2323362843 kmers +read 9500000 sequences, 2629786923 bases, 2344786953 kmers +read 9600000 sequences, 2654330633 bases, 2366330663 kmers +read 9700000 sequences, 2679002198 bases, 2388002228 kmers +read 9800000 sequences, 2703471555 bases, 2409471585 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277828104044627.minimizers.13.bin'... +read 9900000 sequences, 2727977401 bases, 2430977431 kmers +read 10000000 sequences, 2751909350 bases, 2451909380 kmers +read 10100000 sequences, 2776332527 bases, 2473332557 kmers +read 10200000 sequences, 2800726002 bases, 2494726032 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277828104044627.minimizers.14.bin'... +read 10250465 sequences, 2813192630 bases, 2505678680 kmers +num_kmers 2505678680 +num_super_kmers 426160817 +num_pieces 10250466 (+0.245454 [bits/kmer]) +=== step 1: 'parse_file' 121.363 [sec] (48.4351 [ns/kmer]) + == files to merge = 15 +num_written_tuples = 50000000 +num_written_tuples = 100000000 +num_written_tuples = 150000000 +num_written_tuples = 200000000 +num_written_tuples = 250000000 +num_written_tuples = 300000000 +num_written_tuples = 350000000 +num_written_tuples = 400000000 +num_written_tuples = 426160817 +num_minimizers 389546619 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 31.7936 [sec] (12.6886 [ns/kmer]) +bits_per_offset = ceil(log2(2813192661)) = 32 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764277981737441322.bucket_pairs.0.bin'... +num_singletons 378612640/389546619 (97.1932%) +=== step 3: 'build_index' 110.178 [sec] (43.9712 [ns/kmer]) +max_num_super_kmers_in_bucket 33466 +log2_max_num_super_kmers_in_bucket 16 +num_buckets_in_skew_index 42800/389546619 (0.0109871%) +num_partitions 7 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 11962893 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 8358542 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 5476185 + partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 2997306 + partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 1840963 + partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 1003460 + partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 33466: 777233 +num_kmers_in_skew_index 32416582 (1.29372%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 11962893 + building MPHF with 64 threads and 256 partitions... + built mphs[0] for 11962893 keys; bits/key = 2.99042 + built positions[0] for 11962893 keys; bits/key = 7.00003 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 8358542 + building MPHF with 64 threads and 256 partitions... + built mphs[1] for 8358542 keys; bits/key = 3.02981 + built positions[1] for 8358542 keys; bits/key = 8.00004 + lower 256; upper 512; num_bits_per_pos 9; keys_in_partition.size() 5476185 + building MPHF with 64 threads and 256 partitions... + built mphs[2] for 5476185 keys; bits/key = 3.18449 + built positions[2] for 5476185 keys; bits/key = 9.00006 + lower 512; upper 1024; num_bits_per_pos 10; keys_in_partition.size() 2997306 + building MPHF with 64 threads and 256 partitions... + built mphs[3] for 2997306 keys; bits/key = 3.34393 + built positions[3] for 2997306 keys; bits/key = 10.0001 + lower 1024; upper 2048; num_bits_per_pos 11; keys_in_partition.size() 1840963 + building MPHF with 64 threads and 92 partitions... + built mphs[4] for 1840963 keys; bits/key = 3.12381 + built positions[4] for 1840963 keys; bits/key = 11.0002 + lower 2048; upper 4096; num_bits_per_pos 12; keys_in_partition.size() 1003460 + building MPHF with 64 threads and 50 partitions... + built mphs[5] for 1003460 keys; bits/key = 3.0005 + built positions[5] for 1003460 keys; bits/key = 12.0003 + lower 4096; upper 33466; num_bits_per_pos 16; keys_in_partition.size() 777233 + building MPHF with 64 threads and 38 partitions... + built mphs[6] for 777233 keys; bits/key = 2.97284 + built positions[6] for 777233 keys; bits/key = 16.0005 +num_bits_for_skew_index 374230592(0.149353 [bits/kmer]) +=== step 4: 'build_skew_index' 4.45634 [sec] (1.77849 [ns/kmer]) +=== total_time 267.79 [sec] (106.873 [ns/kmer]) +total index size: 2680470314 [B] -- 2680.47 [MB] +SPACE BREAKDOWN: + minimizers: 0.417262 [bits/kmer] (2.68395 [bits/key]) -- 4.87566% + pieces: 0.0459722 [bits/kmer] -- 0.53718% + num_super_kmers_before_bucket: 0.257528 [bits/kmer] -- 3.00918% + offsets: 5.4425 [bits/kmer] -- 63.5949% + strings: 2.24545 [bits/kmer] -- 26.2379% + skew_index: 0.149353 [bits/kmer] -- 1.74517% + weights: 5.87466e-07 [bits/kmer] -- 6.86447e-06% + weight_interval_values: 1.02168e-07 [bits/kmer] + weight_interval_lengths: 3.8313e-07 [bits/kmer] + weight_dictionary: 1.02168e-07 [bits/kmer] + -------------- + total: 8.55807 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 97.1932% +buckets with 2 super_kmers = 1.69307% +buckets with 3 super_kmers = 0.441852% +buckets with 4 super_kmers = 0.200951% +buckets with 5 super_kmers = 0.114315% +buckets with 6 super_kmers = 0.0738004% +buckets with 7 super_kmers = 0.0506224% +buckets with 8 super_kmers = 0.036724% +buckets with 9 super_kmers = 0.0279258% +buckets with 10 super_kmers = 0.0217155% +buckets with 11 super_kmers = 0.0172904% +buckets with 12 super_kmers = 0.0140499% +buckets with 13 super_kmers = 0.0117175% +buckets with 14 super_kmers = 0.00980242% +buckets with 15 super_kmers = 0.00828168% +buckets with 16 super_kmers = 0.00715729% +max_num_super_kmers_in_bucket 33466 +2025-11-27 22:14:56: saving data structure to disk... +2025-11-27 22:14:57: DONE +k = 31, m = 19, seed = 1, l = 6, c = 3, canonical_parsing = false, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k31.eulertigs.fa.gz'... +m_buffer_size 29411764 +read 100000 sequences, 25244517 bases, 22244547 kmers +read 200000 sequences, 51983063 bases, 45983093 kmers +read 300000 sequences, 83502239 bases, 74502269 kmers +read 400000 sequences, 125957954 bases, 113957984 kmers +read 500000 sequences, 196376539 bases, 181376569 kmers +read 600000 sequences, 214563709 bases, 196563739 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278098218425720.minimizers.0.bin'... +read 700000 sequences, 233311986 bases, 212312016 kmers +read 800000 sequences, 251082581 bases, 227082611 kmers +read 900000 sequences, 269982481 bases, 242982511 kmers +read 1000000 sequences, 289026734 bases, 259026764 kmers +read 1100000 sequences, 308418289 bases, 275418319 kmers +read 1200000 sequences, 328185212 bases, 292185242 kmers +read 1300000 sequences, 349175412 bases, 310175442 kmers +read 1400000 sequences, 370064780 bases, 328064810 kmers +read 1500000 sequences, 391112878 bases, 346112908 kmers +read 1600000 sequences, 413509635 bases, 365509665 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278098218425720.minimizers.1.bin'... +read 1645464 sequences, 425569105 bases, 376205185 kmers +num_kmers 376205185 +num_super_kmers 55151083 +num_pieces 1645465 (+0.262431 [bits/kmer]) +=== step 1: 'parse_file' 20.8594 [sec] (55.4469 [ns/kmer]) + == files to merge = 2 +num_written_tuples = 50000000 +num_written_tuples = 55151083 +num_minimizers 52250663 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 3.27533 [sec] (8.70623 [ns/kmer]) +bits_per_offset = ceil(log2(425569136)) = 29 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278122406467019.bucket_pairs.0.bin'... +num_singletons 49978705/52250663 (95.6518%) +=== step 3: 'build_index' 8.66828 [sec] (23.0414 [ns/kmer]) +max_num_super_kmers_in_bucket 239 +log2_max_num_super_kmers_in_bucket 8 +num_buckets_in_skew_index 19/52250663 (3.63632e-05%) +num_partitions 2 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 8330 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 239: 5535 +num_kmers_in_skew_index 13865 (0.00368549%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 8330 + building MPHF with 64 threads and 1 partitions... + built mphs[0] for 8330 keys; bits/key = 3.40552 + built positions[0] for 8330 keys; bits/key = 7.04538 + lower 128; upper 239; num_bits_per_pos 8; keys_in_partition.size() 5535 + building MPHF with 64 threads and 1 partitions... + built mphs[1] for 5535 keys; bits/key = 3.5729 + built positions[1] for 5535 keys; bits/key = 8.05926 +num_bits_for_skew_index 151632(0.000403057 [bits/kmer]) +=== step 4: 'build_skew_index' 0.21561 [sec] (0.573118 [ns/kmer]) +=== total_time 33.0186 [sec] (87.7676 [ns/kmer]) +total index size: 337189658 [B] -- 337.19 [MB] +SPACE BREAKDOWN: + minimizers: 0.382683 [bits/kmer] (2.75532 [bits/key]) -- 5.33702% + pieces: 0.0487325 [bits/kmer] -- 0.679641% + num_super_kmers_before_bucket: 0.224726 [bits/kmer] -- 3.13411% + offsets: 4.25135 [bits/kmer] -- 59.2909% + strings: 2.26243 [bits/kmer] -- 31.5527% + skew_index: 0.000403057 [bits/kmer] -- 0.00562117% + weights: 3.91276e-06 [bits/kmer] -- 5.45687e-05% + weight_interval_values: 6.8048e-07 [bits/kmer] + weight_interval_lengths: 2.5518e-06 [bits/kmer] + weight_dictionary: 6.8048e-07 [bits/kmer] + -------------- + total: 7.17034 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 95.6518% +buckets with 2 super_kmers = 3.56262% +buckets with 3 super_kmers = 0.561857% +buckets with 4 super_kmers = 0.137876% +buckets with 5 super_kmers = 0.0462769% +buckets with 6 super_kmers = 0.0191175% +buckets with 7 super_kmers = 0.00872525% +buckets with 8 super_kmers = 0.00430234% +buckets with 9 super_kmers = 0.00245356% +buckets with 10 super_kmers = 0.0014373% +buckets with 11 super_kmers = 0.000855492% +buckets with 12 super_kmers = 0.00061626% +buckets with 13 super_kmers = 0.000449755% +buckets with 14 super_kmers = 0.000266025% +buckets with 15 super_kmers = 0.000218179% +buckets with 16 super_kmers = 0.000126314% +max_num_super_kmers_in_bucket 239 +2025-11-27 22:15:31: saving data structure to disk... +2025-11-27 22:15:31: DONE +k = 31, m = 21, seed = 1, l = 6, c = 3, canonical_parsing = false, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz'... +m_buffer_size 29411764 +read 100000 sequences, 8718376 bases, 5718406 kmers +read 200000 sequences, 17474036 bases, 11474066 kmers +read 300000 sequences, 26299388 bases, 17299418 kmers +read 400000 sequences, 35167021 bases, 23167051 kmers +read 500000 sequences, 43967859 bases, 28967889 kmers +read 600000 sequences, 52886411 bases, 34886441 kmers +read 700000 sequences, 61937165 bases, 40937195 kmers +read 800000 sequences, 71070675 bases, 47070705 kmers +read 900000 sequences, 80176820 bases, 53176850 kmers +read 1000000 sequences, 89525814 bases, 59525844 kmers +read 1100000 sequences, 98984767 bases, 65984797 kmers +read 1200000 sequences, 108719822 bases, 72719852 kmers +read 1300000 sequences, 118280750 bases, 79280780 kmers +read 1400000 sequences, 127917709 bases, 85917739 kmers +read 1500000 sequences, 137591502 bases, 92591532 kmers +read 1600000 sequences, 147395162 bases, 99395192 kmers +read 1700000 sequences, 157334953 bases, 106334983 kmers +read 1800000 sequences, 167444668 bases, 113444698 kmers +read 1900000 sequences, 177725512 bases, 120725542 kmers +read 2000000 sequences, 188052017 bases, 128052047 kmers +read 2100000 sequences, 198499489 bases, 135499519 kmers +read 2200000 sequences, 209053217 bases, 143053247 kmers +read 2300000 sequences, 219847953 bases, 150847983 kmers +read 2400000 sequences, 230787134 bases, 158787164 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278131588822619.minimizers.0.bin'... +read 2500000 sequences, 242014317 bases, 167014347 kmers +read 2600000 sequences, 253501939 bases, 175501969 kmers +read 2700000 sequences, 265108629 bases, 184108659 kmers +read 2800000 sequences, 277040099 bases, 193040129 kmers +read 2900000 sequences, 289406610 bases, 202406640 kmers +read 3000000 sequences, 302142147 bases, 212142177 kmers +read 3100000 sequences, 315168399 bases, 222168429 kmers +read 3200000 sequences, 329083022 bases, 233083052 kmers +read 3300000 sequences, 343507959 bases, 244507989 kmers +read 3400000 sequences, 358607940 bases, 256607970 kmers +read 3500000 sequences, 374790281 bases, 269790311 kmers +read 3600000 sequences, 392123240 bases, 284123270 kmers +read 3700000 sequences, 410698110 bases, 299698140 kmers +read 3800000 sequences, 431358012 bases, 317358042 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278131588822619.minimizers.1.bin'... +read 3900000 sequences, 454179419 bases, 337179449 kmers +read 4000000 sequences, 461236464 bases, 341236494 kmers +read 4100000 sequences, 468031488 bases, 345031518 kmers +read 4200000 sequences, 474889537 bases, 348889567 kmers +read 4300000 sequences, 481729647 bases, 352729677 kmers +read 4400000 sequences, 488721923 bases, 356721953 kmers +read 4500000 sequences, 495625017 bases, 360625047 kmers +read 4600000 sequences, 502596529 bases, 364596559 kmers +read 4700000 sequences, 509600468 bases, 368600498 kmers +read 4800000 sequences, 516484409 bases, 372484439 kmers +read 4900000 sequences, 523427172 bases, 376427202 kmers +read 5000000 sequences, 530371696 bases, 380371726 kmers +read 5100000 sequences, 537329533 bases, 384329563 kmers +read 5200000 sequences, 544209307 bases, 388209337 kmers +read 5300000 sequences, 551189884 bases, 392189914 kmers +read 5400000 sequences, 558101859 bases, 396101889 kmers +read 5500000 sequences, 565120102 bases, 400120132 kmers +read 5600000 sequences, 572157514 bases, 404157544 kmers +read 5700000 sequences, 579064805 bases, 408064835 kmers +read 5800000 sequences, 586002650 bases, 412002680 kmers +read 5900000 sequences, 592940870 bases, 415940900 kmers +read 6000000 sequences, 600036436 bases, 420036466 kmers +read 6100000 sequences, 607035741 bases, 424035771 kmers +read 6200000 sequences, 614208655 bases, 428208685 kmers +read 6300000 sequences, 621168185 bases, 432168215 kmers +read 6400000 sequences, 628072415 bases, 436072445 kmers +read 6500000 sequences, 635102553 bases, 440102583 kmers +read 6600000 sequences, 642120687 bases, 444120717 kmers +read 6700000 sequences, 649088919 bases, 448088949 kmers +read 6800000 sequences, 656101716 bases, 452101746 kmers +read 6900000 sequences, 663148824 bases, 456148854 kmers +read 7000000 sequences, 670072415 bases, 460072445 kmers +read 7100000 sequences, 677027793 bases, 464027823 kmers +read 7200000 sequences, 684172149 bases, 468172179 kmers +read 7300000 sequences, 691211136 bases, 472211166 kmers +read 7400000 sequences, 698208759 bases, 476208789 kmers +read 7500000 sequences, 705328918 bases, 480328948 kmers +read 7600000 sequences, 712360901 bases, 484360931 kmers +read 7700000 sequences, 719471743 bases, 488471773 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278131588822619.minimizers.2.bin'... +read 7800000 sequences, 726605082 bases, 492605112 kmers +read 7900000 sequences, 733774553 bases, 496774583 kmers +read 8000000 sequences, 740830630 bases, 500830660 kmers +read 8100000 sequences, 747950142 bases, 504950172 kmers +read 8200000 sequences, 755058699 bases, 509058729 kmers +read 8300000 sequences, 762216366 bases, 513216396 kmers +read 8400000 sequences, 769353795 bases, 517353825 kmers +read 8500000 sequences, 776583104 bases, 521583134 kmers +read 8600000 sequences, 783816161 bases, 525816191 kmers +read 8700000 sequences, 791077309 bases, 530077339 kmers +read 8800000 sequences, 798157720 bases, 534157750 kmers +read 8900000 sequences, 805240984 bases, 538241014 kmers +read 9000000 sequences, 812530422 bases, 542530452 kmers +read 9100000 sequences, 819617660 bases, 546617690 kmers +read 9200000 sequences, 826809679 bases, 550809709 kmers +read 9300000 sequences, 833919076 bases, 554919106 kmers +read 9400000 sequences, 841155631 bases, 559155661 kmers +read 9500000 sequences, 848401634 bases, 563401664 kmers +read 9600000 sequences, 855643616 bases, 567643646 kmers +read 9700000 sequences, 862872646 bases, 571872676 kmers +read 9800000 sequences, 870171662 bases, 576171692 kmers +read 9900000 sequences, 877460842 bases, 580460872 kmers +read 10000000 sequences, 884692117 bases, 584692147 kmers +read 10100000 sequences, 891988881 bases, 588988911 kmers +read 10200000 sequences, 899332113 bases, 593332143 kmers +read 10300000 sequences, 906596087 bases, 597596117 kmers +read 10400000 sequences, 913914165 bases, 601914195 kmers +read 10500000 sequences, 921197132 bases, 606197162 kmers +read 10600000 sequences, 928621300 bases, 610621330 kmers +read 10700000 sequences, 935962513 bases, 614962543 kmers +read 10800000 sequences, 943225364 bases, 619225394 kmers +read 10900000 sequences, 950606220 bases, 623606250 kmers +read 11000000 sequences, 958222210 bases, 628222240 kmers +read 11100000 sequences, 965609577 bases, 632609607 kmers +read 11200000 sequences, 972999540 bases, 636999570 kmers +read 11300000 sequences, 980385943 bases, 641385973 kmers +read 11400000 sequences, 987756659 bases, 645756689 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278131588822619.minimizers.3.bin'... +read 11500000 sequences, 995228579 bases, 650228609 kmers +read 11600000 sequences, 1002715916 bases, 654715946 kmers +read 11700000 sequences, 1010163181 bases, 659163211 kmers +read 11800000 sequences, 1017603999 bases, 663604029 kmers +read 11900000 sequences, 1025150283 bases, 668150313 kmers +read 12000000 sequences, 1032736012 bases, 672736042 kmers +read 12100000 sequences, 1040213462 bases, 677213492 kmers +read 12200000 sequences, 1047864710 bases, 681864740 kmers +read 12300000 sequences, 1055370919 bases, 686370949 kmers +read 12400000 sequences, 1062882101 bases, 690882131 kmers +read 12500000 sequences, 1070457753 bases, 695457783 kmers +read 12600000 sequences, 1078056452 bases, 700056482 kmers +read 12700000 sequences, 1085619838 bases, 704619868 kmers +read 12800000 sequences, 1093220312 bases, 709220342 kmers +read 12900000 sequences, 1100937956 bases, 713937986 kmers +read 13000000 sequences, 1108501138 bases, 718501168 kmers +read 13100000 sequences, 1116117632 bases, 723117662 kmers +read 13200000 sequences, 1123791900 bases, 727791930 kmers +read 13300000 sequences, 1131456231 bases, 732456261 kmers +read 13400000 sequences, 1139167200 bases, 737167230 kmers +read 13500000 sequences, 1147084189 bases, 742084219 kmers +read 13600000 sequences, 1154903385 bases, 746903415 kmers +read 13700000 sequences, 1162719538 bases, 751719568 kmers +read 13800000 sequences, 1170515131 bases, 756515161 kmers +read 13900000 sequences, 1178310843 bases, 761310873 kmers +read 14000000 sequences, 1186158447 bases, 766158477 kmers +read 14100000 sequences, 1194143426 bases, 771143456 kmers +read 14200000 sequences, 1202037098 bases, 776037128 kmers +read 14300000 sequences, 1210016683 bases, 781016713 kmers +read 14400000 sequences, 1217929646 bases, 785929676 kmers +read 14500000 sequences, 1226009490 bases, 791009520 kmers +read 14600000 sequences, 1233936479 bases, 795936509 kmers +read 14700000 sequences, 1241953750 bases, 800953780 kmers +read 14800000 sequences, 1249971870 bases, 805971900 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278131588822619.minimizers.4.bin'... +read 14900000 sequences, 1258011905 bases, 811011935 kmers +read 15000000 sequences, 1266102845 bases, 816102875 kmers +read 15100000 sequences, 1274149642 bases, 821149672 kmers +read 15200000 sequences, 1282474792 bases, 826474822 kmers +read 15300000 sequences, 1290768894 bases, 831768924 kmers +read 15400000 sequences, 1299121736 bases, 837121766 kmers +read 15500000 sequences, 1307304137 bases, 842304167 kmers +read 15600000 sequences, 1315661695 bases, 847661725 kmers +read 15700000 sequences, 1324089803 bases, 853089833 kmers +read 15800000 sequences, 1332318556 bases, 858318586 kmers +read 15900000 sequences, 1340760519 bases, 863760549 kmers +read 16000000 sequences, 1349263730 bases, 869263760 kmers +read 16100000 sequences, 1357811657 bases, 874811687 kmers +read 16200000 sequences, 1366358116 bases, 880358146 kmers +read 16300000 sequences, 1375026989 bases, 886027019 kmers +read 16400000 sequences, 1383710154 bases, 891710184 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278131588822619.minimizers.5.bin'... +read 16440873 sequences, 1387536274 bases, 894310084 kmers +num_kmers 894310084 +num_super_kmers 162771744 +num_pieces 16440874 (+1.10303 [bits/kmer]) +=== step 1: 'parse_file' 46.7945 [sec] (52.3247 [ns/kmer]) + == files to merge = 6 +num_written_tuples = 50000000 +num_written_tuples = 100000000 +num_written_tuples = 150000000 +num_written_tuples = 162771744 +num_minimizers 126769732 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 10.0352 [sec] (11.2212 [ns/kmer]) +bits_per_offset = ceil(log2(1387536305)) = 31 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278188592687083.bucket_pairs.0.bin'... +num_singletons 112630701/126769732 (88.8467%) +=== step 3: 'build_index' 31.4503 [sec] (35.1671 [ns/kmer]) +max_num_super_kmers_in_bucket 51789 +log2_max_num_super_kmers_in_bucket 16 +num_buckets_in_skew_index 8370/126769732 (0.00660252%) +num_partitions 7 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 2275161 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 1223509 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 863425 + partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 648516 + partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 426533 + partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 398117 + partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 51789: 631711 +num_kmers_in_skew_index 6466972 (0.723124%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 2275161 + building MPHF with 64 threads and 113 partitions... + built mphs[0] for 2275161 keys; bits/key = 3.14493 + built positions[0] for 2275161 keys; bits/key = 7.00015 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 1223509 + building MPHF with 64 threads and 61 partitions... + built mphs[1] for 1223509 keys; bits/key = 3.02551 + built positions[1] for 1223509 keys; bits/key = 8.00028 + lower 256; upper 512; num_bits_per_pos 9; keys_in_partition.size() 863425 + building MPHF with 64 threads and 43 partitions... + built mphs[2] for 863425 keys; bits/key = 2.98758 + built positions[2] for 863425 keys; bits/key = 9.00043 + lower 512; upper 1024; num_bits_per_pos 10; keys_in_partition.size() 648516 + building MPHF with 64 threads and 32 partitions... + built mphs[3] for 648516 keys; bits/key = 2.97413 + built positions[3] for 648516 keys; bits/key = 10.0005 + lower 1024; upper 2048; num_bits_per_pos 11; keys_in_partition.size() 426533 + building MPHF with 64 threads and 21 partitions... + built mphs[4] for 426533 keys; bits/key = 2.95521 + built positions[4] for 426533 keys; bits/key = 11.0008 + lower 2048; upper 4096; num_bits_per_pos 12; keys_in_partition.size() 398117 + building MPHF with 64 threads and 19 partitions... + built mphs[5] for 398117 keys; bits/key = 2.92545 + built positions[5] for 398117 keys; bits/key = 12.0008 + lower 4096; upper 51789; num_bits_per_pos 16; keys_in_partition.size() 631711 + building MPHF with 64 threads and 31 partitions... + built mphs[6] for 631711 keys; bits/key = 2.97017 + built positions[6] for 631711 keys; bits/key = 16.0005 +num_bits_for_skew_index 79216192(0.088578 [bits/kmer]) +=== step 4: 'build_skew_index' 1.3006 [sec] (1.45431 [ns/kmer]) +=== total_time 89.5806 [sec] (100.167 [ns/kmer]) +total index size: 1078619646 [B] -- 1078.62 [MB] +SPACE BREAKDOWN: + minimizers: 0.376213 [bits/kmer] (2.65403 [bits/key]) -- 3.89909% + pieces: 0.176908 [bits/kmer] -- 1.83349% + num_super_kmers_before_bucket: 0.261745 [bits/kmer] -- 2.71274% + offsets: 5.64225 [bits/kmer] -- 58.4766% + strings: 3.10303 [bits/kmer] -- 32.16% + skew_index: 0.088578 [bits/kmer] -- 0.918027% + weights: 1.64596e-06 [bits/kmer] -- 1.70588e-05% + weight_interval_values: 2.86254e-07 [bits/kmer] + weight_interval_lengths: 1.07345e-06 [bits/kmer] + weight_dictionary: 2.86254e-07 [bits/kmer] + -------------- + total: 9.64873 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 88.8467% +buckets with 2 super_kmers = 6.6467% +buckets with 3 super_kmers = 1.83072% +buckets with 4 super_kmers = 0.816895% +buckets with 5 super_kmers = 0.474662% +buckets with 6 super_kmers = 0.31934% +buckets with 7 super_kmers = 0.234625% +buckets with 8 super_kmers = 0.179613% +buckets with 9 super_kmers = 0.139152% +buckets with 10 super_kmers = 0.108534% +buckets with 11 super_kmers = 0.0830316% +buckets with 12 super_kmers = 0.0640003% +buckets with 13 super_kmers = 0.048454% +buckets with 14 super_kmers = 0.0367493% +buckets with 15 super_kmers = 0.028166% +buckets with 16 super_kmers = 0.0216187% +max_num_super_kmers_in_bucket 51789 +2025-11-27 22:17:01: saving data structure to disk... +2025-11-27 22:17:01: DONE +k = 31, m = 21, seed = 1, l = 6, c = 3, canonical_parsing = false, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/hprc.k31.eulertigs.fa.gz'... +m_buffer_size 29411764 +read 100000 sequences, 15142600 bases, 12142630 kmers +read 200000 sequences, 30291716 bases, 24291746 kmers +read 300000 sequences, 45689080 bases, 36689110 kmers +read 400000 sequences, 61242824 bases, 49242854 kmers +read 500000 sequences, 77204688 bases, 62204718 kmers +read 600000 sequences, 93322728 bases, 75322758 kmers +read 700000 sequences, 109580784 bases, 88580814 kmers +read 800000 sequences, 126019199 bases, 102019229 kmers +read 900000 sequences, 142771255 bases, 115771285 kmers +read 1000000 sequences, 159860280 bases, 129860310 kmers +read 1100000 sequences, 177221889 bases, 144221919 kmers +read 1200000 sequences, 194614623 bases, 158614653 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.0.bin'... +read 1300000 sequences, 212431367 bases, 173431397 kmers +read 1400000 sequences, 230443062 bases, 188443092 kmers +read 1500000 sequences, 248830724 bases, 203830754 kmers +read 1600000 sequences, 267495814 bases, 219495844 kmers +read 1700000 sequences, 286467424 bases, 235467454 kmers +read 1800000 sequences, 305974756 bases, 251974786 kmers +read 1900000 sequences, 325573452 bases, 268573482 kmers +read 2000000 sequences, 345518992 bases, 285519022 kmers +read 2100000 sequences, 365932818 bases, 302932848 kmers +read 2200000 sequences, 386928554 bases, 320928584 kmers +read 2300000 sequences, 408196559 bases, 339196589 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.1.bin'... +read 2400000 sequences, 429705966 bases, 357705996 kmers +read 2500000 sequences, 451663903 bases, 376663933 kmers +read 2600000 sequences, 474186039 bases, 396186069 kmers +read 2700000 sequences, 497212102 bases, 416212132 kmers +read 2800000 sequences, 520661914 bases, 436661944 kmers +read 2900000 sequences, 544613770 bases, 457613800 kmers +read 3000000 sequences, 569210361 bases, 479210391 kmers +read 3100000 sequences, 594100200 bases, 501100230 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.2.bin'... +read 3200000 sequences, 619323714 bases, 523323744 kmers +read 3300000 sequences, 645628376 bases, 546628406 kmers +read 3400000 sequences, 672447793 bases, 570447823 kmers +read 3500000 sequences, 699905664 bases, 594905694 kmers +read 3600000 sequences, 728253444 bases, 620253474 kmers +read 3700000 sequences, 756996641 bases, 645996671 kmers +read 3800000 sequences, 786498161 bases, 672498191 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.3.bin'... +read 3900000 sequences, 816910234 bases, 699910264 kmers +read 4000000 sequences, 848332113 bases, 728332143 kmers +read 4100000 sequences, 880941119 bases, 757941149 kmers +read 4200000 sequences, 914418229 bases, 788418259 kmers +read 4300000 sequences, 948701080 bases, 819701110 kmers +read 4400000 sequences, 984340067 bases, 852340097 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.4.bin'... +read 4500000 sequences, 1021325774 bases, 886325804 kmers +read 4600000 sequences, 1059629391 bases, 921629421 kmers +read 4700000 sequences, 1098776183 bases, 957776213 kmers +read 4800000 sequences, 1139816048 bases, 995816078 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.5.bin'... +read 4900000 sequences, 1182502543 bases, 1035502573 kmers +read 5000000 sequences, 1226889874 bases, 1076889904 kmers +read 5100000 sequences, 1272830961 bases, 1119830991 kmers +read 5200000 sequences, 1320724320 bases, 1164724350 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.6.bin'... +read 5300000 sequences, 1371130682 bases, 1212130712 kmers +read 5400000 sequences, 1423474687 bases, 1261474717 kmers +read 5500000 sequences, 1478532677 bases, 1313532707 kmers +read 5600000 sequences, 1536511057 bases, 1368511087 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.7.bin'... +read 5700000 sequences, 1597650497 bases, 1426650527 kmers +read 5800000 sequences, 1661331597 bases, 1487331627 kmers +read 5900000 sequences, 1728788299 bases, 1551788329 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.8.bin'... +read 6000000 sequences, 1800462732 bases, 1620462762 kmers +read 6100000 sequences, 1855582708 bases, 1672582738 kmers +read 6200000 sequences, 1861290572 bases, 1675290602 kmers +read 6300000 sequences, 1866991959 bases, 1677991989 kmers +read 6400000 sequences, 1872712703 bases, 1680712733 kmers +read 6500000 sequences, 1878412742 bases, 1683412772 kmers +read 6600000 sequences, 1884126738 bases, 1686126768 kmers +read 6700000 sequences, 1889847765 bases, 1688847795 kmers +read 6800000 sequences, 1895565065 bases, 1691565095 kmers +read 6900000 sequences, 1901268580 bases, 1694268610 kmers +read 7000000 sequences, 1906975331 bases, 1696975361 kmers +read 7100000 sequences, 1912694936 bases, 1699694966 kmers +read 7200000 sequences, 1918405386 bases, 1702405416 kmers +read 7300000 sequences, 1924111712 bases, 1705111742 kmers +read 7400000 sequences, 1929830119 bases, 1707830149 kmers +read 7500000 sequences, 1935539974 bases, 1710540004 kmers +read 7600000 sequences, 1941266098 bases, 1713266128 kmers +read 7700000 sequences, 1946976061 bases, 1715976091 kmers +read 7800000 sequences, 1952688102 bases, 1718688132 kmers +read 7900000 sequences, 1958401942 bases, 1721401972 kmers +read 8000000 sequences, 1964117224 bases, 1724117254 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.9.bin'... +read 8100000 sequences, 1969830329 bases, 1726830359 kmers +read 8200000 sequences, 1975558497 bases, 1729558527 kmers +read 8300000 sequences, 1981283991 bases, 1732284021 kmers +read 8400000 sequences, 1986995233 bases, 1734995263 kmers +read 8500000 sequences, 1992721377 bases, 1737721407 kmers +read 8600000 sequences, 1998449484 bases, 1740449514 kmers +read 8700000 sequences, 2004184325 bases, 1743184355 kmers +read 8800000 sequences, 2009910285 bases, 1745910315 kmers +read 8900000 sequences, 2015628049 bases, 1748628079 kmers +read 9000000 sequences, 2021346657 bases, 1751346687 kmers +read 9100000 sequences, 2027081520 bases, 1754081550 kmers +read 9200000 sequences, 2032818008 bases, 1756818038 kmers +read 9300000 sequences, 2038553000 bases, 1759553030 kmers +read 9400000 sequences, 2044303164 bases, 1762303194 kmers +read 9500000 sequences, 2050045456 bases, 1765045486 kmers +read 9600000 sequences, 2055782084 bases, 1767782114 kmers +read 9700000 sequences, 2061515459 bases, 1770515489 kmers +read 9800000 sequences, 2067264696 bases, 1773264726 kmers +read 9900000 sequences, 2073029586 bases, 1776029616 kmers +read 10000000 sequences, 2078777071 bases, 1778777101 kmers +read 10100000 sequences, 2084516688 bases, 1781516718 kmers +read 10200000 sequences, 2090254180 bases, 1784254210 kmers +read 10300000 sequences, 2096010791 bases, 1787010821 kmers +read 10400000 sequences, 2101750068 bases, 1789750098 kmers +read 10500000 sequences, 2107490469 bases, 1792490499 kmers +read 10600000 sequences, 2113226774 bases, 1795226804 kmers +read 10700000 sequences, 2118983996 bases, 1797984026 kmers +read 10800000 sequences, 2124753124 bases, 1800753154 kmers +read 10900000 sequences, 2130500309 bases, 1803500339 kmers +read 11000000 sequences, 2136245797 bases, 1806245827 kmers +read 11100000 sequences, 2141998968 bases, 1808998998 kmers +read 11200000 sequences, 2147751449 bases, 1811751479 kmers +read 11300000 sequences, 2153511605 bases, 1814511635 kmers +read 11400000 sequences, 2159254615 bases, 1817254645 kmers +read 11500000 sequences, 2165018820 bases, 1820018850 kmers +read 11600000 sequences, 2170788091 bases, 1822788121 kmers +read 11700000 sequences, 2176561459 bases, 1825561489 kmers +read 11800000 sequences, 2182327393 bases, 1828327423 kmers +read 11900000 sequences, 2188093393 bases, 1831093423 kmers +read 12000000 sequences, 2193864455 bases, 1833864485 kmers +read 12100000 sequences, 2199639949 bases, 1836639979 kmers +read 12200000 sequences, 2205409553 bases, 1839409583 kmers +read 12300000 sequences, 2211196032 bases, 1842196062 kmers +read 12400000 sequences, 2216980660 bases, 1844980690 kmers +read 12500000 sequences, 2222778653 bases, 1847778683 kmers +read 12600000 sequences, 2228548298 bases, 1850548328 kmers +read 12700000 sequences, 2234333686 bases, 1853333716 kmers +read 12800000 sequences, 2240119243 bases, 1856119273 kmers +read 12900000 sequences, 2245909832 bases, 1858909862 kmers +read 13000000 sequences, 2251713079 bases, 1861713109 kmers +read 13100000 sequences, 2257517156 bases, 1864517186 kmers +read 13200000 sequences, 2263299799 bases, 1867299829 kmers +read 13300000 sequences, 2269090355 bases, 1870090385 kmers +read 13400000 sequences, 2274889056 bases, 1872889086 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.10.bin'... +read 13500000 sequences, 2280679358 bases, 1875679388 kmers +read 13600000 sequences, 2286496297 bases, 1878496327 kmers +read 13700000 sequences, 2292303406 bases, 1881303436 kmers +read 13800000 sequences, 2298095843 bases, 1884095873 kmers +read 13900000 sequences, 2303896230 bases, 1886896260 kmers +read 14000000 sequences, 2309685280 bases, 1889685310 kmers +read 14100000 sequences, 2315487789 bases, 1892487819 kmers +read 14200000 sequences, 2321299096 bases, 1895299126 kmers +read 14300000 sequences, 2327098692 bases, 1898098722 kmers +read 14400000 sequences, 2332930649 bases, 1900930679 kmers +read 14500000 sequences, 2338742250 bases, 1903742280 kmers +read 14600000 sequences, 2344551767 bases, 1906551797 kmers +read 14700000 sequences, 2350372494 bases, 1909372524 kmers +read 14800000 sequences, 2356184423 bases, 1912184453 kmers +read 14900000 sequences, 2362012427 bases, 1915012457 kmers +read 15000000 sequences, 2367830829 bases, 1917830859 kmers +read 15100000 sequences, 2373646109 bases, 1920646139 kmers +read 15200000 sequences, 2379487578 bases, 1923487608 kmers +read 15300000 sequences, 2385319549 bases, 1926319579 kmers +read 15400000 sequences, 2391159959 bases, 1929159989 kmers +read 15500000 sequences, 2396996685 bases, 1931996715 kmers +read 15600000 sequences, 2402827442 bases, 1934827472 kmers +read 15700000 sequences, 2408655214 bases, 1937655244 kmers +read 15800000 sequences, 2414491211 bases, 1940491241 kmers +read 15900000 sequences, 2420340361 bases, 1943340391 kmers +read 16000000 sequences, 2426185046 bases, 1946185076 kmers +read 16100000 sequences, 2432032084 bases, 1949032114 kmers +read 16200000 sequences, 2437865309 bases, 1951865339 kmers +read 16300000 sequences, 2443712415 bases, 1954712445 kmers +read 16400000 sequences, 2449571035 bases, 1957571065 kmers +read 16500000 sequences, 2455436767 bases, 1960436797 kmers +read 16600000 sequences, 2461294139 bases, 1963294169 kmers +read 16700000 sequences, 2467151066 bases, 1966151096 kmers +read 16800000 sequences, 2473014518 bases, 1969014548 kmers +read 16900000 sequences, 2478879148 bases, 1971879178 kmers +read 17000000 sequences, 2484756311 bases, 1974756341 kmers +read 17100000 sequences, 2490639911 bases, 1977639941 kmers +read 17200000 sequences, 2496505090 bases, 1980505120 kmers +read 17300000 sequences, 2502376237 bases, 1983376267 kmers +read 17400000 sequences, 2508235866 bases, 1986235896 kmers +read 17500000 sequences, 2514118929 bases, 1989118959 kmers +read 17600000 sequences, 2520003821 bases, 1992003851 kmers +read 17700000 sequences, 2525879572 bases, 1994879602 kmers +read 17800000 sequences, 2531769727 bases, 1997769757 kmers +read 17900000 sequences, 2537665456 bases, 2000665486 kmers +read 18000000 sequences, 2543560738 bases, 2003560768 kmers +read 18100000 sequences, 2549454266 bases, 2006454296 kmers +read 18200000 sequences, 2555337271 bases, 2009337301 kmers +read 18300000 sequences, 2561224774 bases, 2012224804 kmers +read 18400000 sequences, 2567143057 bases, 2015143087 kmers +read 18500000 sequences, 2573036123 bases, 2018036153 kmers +read 18600000 sequences, 2578923948 bases, 2020923978 kmers +read 18700000 sequences, 2584830711 bases, 2023830741 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.11.bin'... +read 18800000 sequences, 2590732809 bases, 2026732839 kmers +read 18900000 sequences, 2596629138 bases, 2029629168 kmers +read 19000000 sequences, 2602544769 bases, 2032544799 kmers +read 19100000 sequences, 2608476616 bases, 2035476646 kmers +read 19200000 sequences, 2614389831 bases, 2038389861 kmers +read 19300000 sequences, 2620312306 bases, 2041312336 kmers +read 19400000 sequences, 2626244668 bases, 2044244698 kmers +read 19500000 sequences, 2632165840 bases, 2047165870 kmers +read 19600000 sequences, 2638096284 bases, 2050096314 kmers +read 19700000 sequences, 2644022948 bases, 2053022978 kmers +read 19800000 sequences, 2649949779 bases, 2055949809 kmers +read 19900000 sequences, 2655887655 bases, 2058887685 kmers +read 20000000 sequences, 2661829270 bases, 2061829300 kmers +read 20100000 sequences, 2667773885 bases, 2064773915 kmers +read 20200000 sequences, 2673731680 bases, 2067731710 kmers +read 20300000 sequences, 2679668638 bases, 2070668668 kmers +read 20400000 sequences, 2685621410 bases, 2073621440 kmers +read 20500000 sequences, 2691583280 bases, 2076583310 kmers +read 20600000 sequences, 2697548031 bases, 2079548061 kmers +read 20700000 sequences, 2703523021 bases, 2082523051 kmers +read 20800000 sequences, 2709482823 bases, 2085482853 kmers +read 20900000 sequences, 2715437358 bases, 2088437388 kmers +read 21000000 sequences, 2721408412 bases, 2091408442 kmers +read 21100000 sequences, 2727378283 bases, 2094378313 kmers +read 21200000 sequences, 2733356793 bases, 2097356823 kmers +read 21300000 sequences, 2739324030 bases, 2100324060 kmers +read 21400000 sequences, 2745310949 bases, 2103310979 kmers +read 21500000 sequences, 2751305483 bases, 2106305513 kmers +read 21600000 sequences, 2757279970 bases, 2109280000 kmers +read 21700000 sequences, 2763250342 bases, 2112250372 kmers +read 21800000 sequences, 2769246031 bases, 2115246061 kmers +read 21900000 sequences, 2775228369 bases, 2118228399 kmers +read 22000000 sequences, 2781228805 bases, 2121228835 kmers +read 22100000 sequences, 2787227943 bases, 2124227973 kmers +read 22200000 sequences, 2793232303 bases, 2127232333 kmers +read 22300000 sequences, 2799254476 bases, 2130254506 kmers +read 22400000 sequences, 2805268478 bases, 2133268508 kmers +read 22500000 sequences, 2811273807 bases, 2136273837 kmers +read 22600000 sequences, 2817297446 bases, 2139297476 kmers +read 22700000 sequences, 2823311839 bases, 2142311869 kmers +read 22800000 sequences, 2829348673 bases, 2145348703 kmers +read 22900000 sequences, 2835387522 bases, 2148387552 kmers +read 23000000 sequences, 2841415061 bases, 2151415091 kmers +read 23100000 sequences, 2847447355 bases, 2154447385 kmers +read 23200000 sequences, 2853464241 bases, 2157464271 kmers +read 23300000 sequences, 2859504344 bases, 2160504374 kmers +read 23400000 sequences, 2865558640 bases, 2163558670 kmers +read 23500000 sequences, 2871616623 bases, 2166616653 kmers +read 23600000 sequences, 2877666220 bases, 2169666250 kmers +read 23700000 sequences, 2883708997 bases, 2172709027 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.12.bin'... +read 23800000 sequences, 2889780064 bases, 2175780094 kmers +read 23900000 sequences, 2895851763 bases, 2178851793 kmers +read 24000000 sequences, 2901936339 bases, 2181936369 kmers +read 24100000 sequences, 2908006001 bases, 2185006031 kmers +read 24200000 sequences, 2914097489 bases, 2188097519 kmers +read 24300000 sequences, 2920158525 bases, 2191158555 kmers +read 24400000 sequences, 2926232288 bases, 2194232318 kmers +read 24500000 sequences, 2932310359 bases, 2197310389 kmers +read 24600000 sequences, 2938383670 bases, 2200383700 kmers +read 24700000 sequences, 2944455547 bases, 2203455577 kmers +read 24800000 sequences, 2950553509 bases, 2206553539 kmers +read 24900000 sequences, 2956625157 bases, 2209625187 kmers +read 25000000 sequences, 2962750710 bases, 2212750740 kmers +read 25100000 sequences, 2968838441 bases, 2215838471 kmers +read 25200000 sequences, 2974964577 bases, 2218964607 kmers +read 25300000 sequences, 2981066348 bases, 2222066378 kmers +read 25400000 sequences, 2987174670 bases, 2225174700 kmers +read 25500000 sequences, 2993287441 bases, 2228287471 kmers +read 25600000 sequences, 2999404181 bases, 2231404211 kmers +read 25700000 sequences, 3005525939 bases, 2234525969 kmers +read 25800000 sequences, 3011641818 bases, 2237641848 kmers +read 25900000 sequences, 3017762680 bases, 2240762710 kmers +read 26000000 sequences, 3023914368 bases, 2243914398 kmers +read 26100000 sequences, 3030074467 bases, 2247074497 kmers +read 26200000 sequences, 3036215358 bases, 2250215388 kmers +read 26300000 sequences, 3042374173 bases, 2253374203 kmers +read 26400000 sequences, 3048520171 bases, 2256520201 kmers +read 26500000 sequences, 3054682791 bases, 2259682821 kmers +read 26600000 sequences, 3060872341 bases, 2262872371 kmers +read 26700000 sequences, 3067031340 bases, 2266031370 kmers +read 26800000 sequences, 3073202220 bases, 2269202250 kmers +read 26900000 sequences, 3079363082 bases, 2272363112 kmers +read 27000000 sequences, 3085556027 bases, 2275556057 kmers +read 27100000 sequences, 3091751521 bases, 2278751551 kmers +read 27200000 sequences, 3097952572 bases, 2281952602 kmers +read 27300000 sequences, 3104145541 bases, 2285145571 kmers +read 27400000 sequences, 3110358894 bases, 2288358924 kmers +read 27500000 sequences, 3116536976 bases, 2291537006 kmers +read 27600000 sequences, 3122708679 bases, 2294708709 kmers +read 27700000 sequences, 3128900397 bases, 2297900427 kmers +read 27800000 sequences, 3135111542 bases, 2301111572 kmers +read 27900000 sequences, 3141317211 bases, 2304317241 kmers +read 28000000 sequences, 3147523754 bases, 2307523784 kmers +read 28100000 sequences, 3153730407 bases, 2310730437 kmers +read 28200000 sequences, 3159957684 bases, 2313957714 kmers +read 28300000 sequences, 3166181142 bases, 2317181172 kmers +read 28400000 sequences, 3172412131 bases, 2320412161 kmers +read 28500000 sequences, 3178654828 bases, 2323654858 kmers +read 28600000 sequences, 3184886524 bases, 2326886554 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.13.bin'... +read 28700000 sequences, 3191117068 bases, 2330117098 kmers +read 28800000 sequences, 3197379540 bases, 2333379570 kmers +read 28900000 sequences, 3203639436 bases, 2336639466 kmers +read 29000000 sequences, 3209891656 bases, 2339891686 kmers +read 29100000 sequences, 3216141216 bases, 2343141246 kmers +read 29200000 sequences, 3222413290 bases, 2346413320 kmers +read 29300000 sequences, 3228708327 bases, 2349708357 kmers +read 29400000 sequences, 3234965760 bases, 2352965790 kmers +read 29500000 sequences, 3241275202 bases, 2356275232 kmers +read 29600000 sequences, 3247560688 bases, 2359560718 kmers +read 29700000 sequences, 3253851429 bases, 2362851459 kmers +read 29800000 sequences, 3260147813 bases, 2366147843 kmers +read 29900000 sequences, 3266443188 bases, 2369443218 kmers +read 30000000 sequences, 3272761129 bases, 2372761159 kmers +read 30100000 sequences, 3279084870 bases, 2376084900 kmers +read 30200000 sequences, 3285396291 bases, 2379396321 kmers +read 30300000 sequences, 3291706637 bases, 2382706667 kmers +read 30400000 sequences, 3298020747 bases, 2386020777 kmers +read 30500000 sequences, 3304364961 bases, 2389364991 kmers +read 30600000 sequences, 3310727391 bases, 2392727421 kmers +read 30700000 sequences, 3317071594 bases, 2396071624 kmers +read 30800000 sequences, 3323415712 bases, 2399415742 kmers +read 30900000 sequences, 3329790954 bases, 2402790984 kmers +read 31000000 sequences, 3336150928 bases, 2406150958 kmers +read 31100000 sequences, 3342541624 bases, 2409541654 kmers +read 31200000 sequences, 3348907387 bases, 2412907417 kmers +read 31300000 sequences, 3355301252 bases, 2416301282 kmers +read 31400000 sequences, 3361677930 bases, 2419677960 kmers +read 31500000 sequences, 3368088580 bases, 2423088610 kmers +read 31600000 sequences, 3374497373 bases, 2426497403 kmers +read 31700000 sequences, 3380954973 bases, 2429955003 kmers +read 31800000 sequences, 3387385456 bases, 2433385486 kmers +read 31900000 sequences, 3393821566 bases, 2436821596 kmers +read 32000000 sequences, 3400254669 bases, 2440254699 kmers +read 32100000 sequences, 3406661433 bases, 2443661463 kmers +read 32200000 sequences, 3413130559 bases, 2447130589 kmers +read 32300000 sequences, 3419570382 bases, 2450570412 kmers +read 32400000 sequences, 3426014430 bases, 2454014460 kmers +read 32500000 sequences, 3432484446 bases, 2457484476 kmers +read 32600000 sequences, 3438957018 bases, 2460957048 kmers +read 32700000 sequences, 3445449720 bases, 2464449750 kmers +read 32800000 sequences, 3451918191 bases, 2467918221 kmers +read 32900000 sequences, 3458402208 bases, 2471402238 kmers +read 33000000 sequences, 3464886745 bases, 2474886775 kmers +read 33100000 sequences, 3471383068 bases, 2478383098 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.14.bin'... +read 33200000 sequences, 3477878747 bases, 2481878777 kmers +read 33300000 sequences, 3484417152 bases, 2485417182 kmers +read 33400000 sequences, 3490941859 bases, 2488941889 kmers +read 33500000 sequences, 3497445803 bases, 2492445833 kmers +read 33600000 sequences, 3503981393 bases, 2495981423 kmers +read 33700000 sequences, 3510547286 bases, 2499547316 kmers +read 33800000 sequences, 3517100145 bases, 2503100175 kmers +read 33900000 sequences, 3523655251 bases, 2506655281 kmers +read 34000000 sequences, 3530247130 bases, 2510247160 kmers +read 34100000 sequences, 3536826114 bases, 2513826144 kmers +read 34200000 sequences, 3543370719 bases, 2517370749 kmers +read 34300000 sequences, 3549941926 bases, 2520941956 kmers +read 34400000 sequences, 3556525084 bases, 2524525114 kmers +read 34500000 sequences, 3563130081 bases, 2528130111 kmers +read 34600000 sequences, 3569739780 bases, 2531739810 kmers +read 34700000 sequences, 3576369309 bases, 2535369339 kmers +read 34800000 sequences, 3582984603 bases, 2538984633 kmers +read 34900000 sequences, 3589639338 bases, 2542639368 kmers +read 35000000 sequences, 3596273782 bases, 2546273812 kmers +read 35100000 sequences, 3602915624 bases, 2549915654 kmers +read 35200000 sequences, 3609531465 bases, 2553531495 kmers +read 35300000 sequences, 3616196895 bases, 2557196925 kmers +read 35400000 sequences, 3622877377 bases, 2560877407 kmers +read 35500000 sequences, 3629566990 bases, 2564567020 kmers +read 35600000 sequences, 3636241308 bases, 2568241338 kmers +read 35700000 sequences, 3642914924 bases, 2571914954 kmers +read 35800000 sequences, 3649603351 bases, 2575603381 kmers +read 35900000 sequences, 3656318441 bases, 2579318471 kmers +read 36000000 sequences, 3663044755 bases, 2583044785 kmers +read 36100000 sequences, 3669777945 bases, 2586777975 kmers +read 36200000 sequences, 3676509798 bases, 2590509828 kmers +read 36300000 sequences, 3683277205 bases, 2594277235 kmers +read 36400000 sequences, 3690023259 bases, 2598023289 kmers +read 36500000 sequences, 3696780780 bases, 2601780810 kmers +read 36600000 sequences, 3703553916 bases, 2605553946 kmers +read 36700000 sequences, 3710337571 bases, 2609337601 kmers +read 36800000 sequences, 3717123025 bases, 2613123055 kmers +read 36900000 sequences, 3723910267 bases, 2616910297 kmers +read 37000000 sequences, 3730743453 bases, 2620743483 kmers +read 37100000 sequences, 3737567873 bases, 2624567903 kmers +read 37200000 sequences, 3744378273 bases, 2628378303 kmers +read 37300000 sequences, 3751211086 bases, 2632211116 kmers +read 37400000 sequences, 3758073143 bases, 2636073173 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.15.bin'... +read 37500000 sequences, 3764943116 bases, 2639943146 kmers +read 37600000 sequences, 3771815746 bases, 2643815776 kmers +read 37700000 sequences, 3778661925 bases, 2647661955 kmers +read 37800000 sequences, 3785547694 bases, 2651547724 kmers +read 37900000 sequences, 3792403153 bases, 2655403183 kmers +read 38000000 sequences, 3799297812 bases, 2659297842 kmers +read 38100000 sequences, 3806239974 bases, 2663240004 kmers +read 38200000 sequences, 3813157585 bases, 2667157615 kmers +read 38300000 sequences, 3820098418 bases, 2671098448 kmers +read 38400000 sequences, 3827045678 bases, 2675045708 kmers +read 38500000 sequences, 3834035768 bases, 2679035798 kmers +read 38600000 sequences, 3841003319 bases, 2683003349 kmers +read 38700000 sequences, 3848003705 bases, 2687003735 kmers +read 38800000 sequences, 3854998310 bases, 2690998340 kmers +read 38900000 sequences, 3861999344 bases, 2694999374 kmers +read 39000000 sequences, 3869022053 bases, 2699022083 kmers +read 39100000 sequences, 3876075267 bases, 2703075297 kmers +read 39200000 sequences, 3883150321 bases, 2707150351 kmers +read 39300000 sequences, 3890222625 bases, 2711222655 kmers +read 39400000 sequences, 3897268484 bases, 2715268514 kmers +read 39500000 sequences, 3904370112 bases, 2719370142 kmers +read 39600000 sequences, 3911448030 bases, 2723448060 kmers +read 39700000 sequences, 3918568345 bases, 2727568375 kmers +read 39800000 sequences, 3925645444 bases, 2731645474 kmers +read 39900000 sequences, 3932749408 bases, 2735749438 kmers +read 40000000 sequences, 3939899844 bases, 2739899874 kmers +read 40100000 sequences, 3947016310 bases, 2744016340 kmers +read 40200000 sequences, 3954176373 bases, 2748176403 kmers +read 40300000 sequences, 3961389382 bases, 2752389412 kmers +read 40400000 sequences, 3968552071 bases, 2756552101 kmers +read 40500000 sequences, 3975752223 bases, 2760752253 kmers +read 40600000 sequences, 3982970739 bases, 2764970769 kmers +read 40700000 sequences, 3990152709 bases, 2769152739 kmers +read 40800000 sequences, 3997405356 bases, 2773405386 kmers +read 40900000 sequences, 4004672388 bases, 2777672418 kmers +read 41000000 sequences, 4011944292 bases, 2781944322 kmers +read 41100000 sequences, 4019238344 bases, 2786238374 kmers +read 41200000 sequences, 4026519143 bases, 2790519173 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.16.bin'... +read 41300000 sequences, 4033822566 bases, 2794822596 kmers +read 41400000 sequences, 4041178251 bases, 2799178281 kmers +read 41500000 sequences, 4048493790 bases, 2803493820 kmers +read 41600000 sequences, 4055829038 bases, 2807829068 kmers +read 41700000 sequences, 4063212559 bases, 2812212589 kmers +read 41800000 sequences, 4070629890 bases, 2816629920 kmers +read 41900000 sequences, 4078028063 bases, 2821028093 kmers +read 42000000 sequences, 4085447723 bases, 2825447753 kmers +read 42100000 sequences, 4092897915 bases, 2829897945 kmers +read 42200000 sequences, 4100371858 bases, 2834371888 kmers +read 42300000 sequences, 4107845174 bases, 2838845204 kmers +read 42400000 sequences, 4115310509 bases, 2843310539 kmers +read 42500000 sequences, 4122794181 bases, 2847794211 kmers +read 42600000 sequences, 4130336752 bases, 2852336782 kmers +read 42700000 sequences, 4137838898 bases, 2856838928 kmers +read 42800000 sequences, 4145439502 bases, 2861439532 kmers +read 42900000 sequences, 4153050260 bases, 2866050290 kmers +read 43000000 sequences, 4160667131 bases, 2870667161 kmers +read 43100000 sequences, 4168281208 bases, 2875281238 kmers +read 43200000 sequences, 4175884381 bases, 2879884411 kmers +read 43300000 sequences, 4183510403 bases, 2884510433 kmers +read 43400000 sequences, 4191153977 bases, 2889154007 kmers +read 43500000 sequences, 4198862981 bases, 2893863011 kmers +read 43600000 sequences, 4206635916 bases, 2898635946 kmers +read 43700000 sequences, 4214404635 bases, 2903404665 kmers +read 43800000 sequences, 4222136044 bases, 2908136074 kmers +read 43900000 sequences, 4229915489 bases, 2912915519 kmers +read 44000000 sequences, 4237696439 bases, 2917696469 kmers +read 44100000 sequences, 4245541347 bases, 2922541377 kmers +read 44200000 sequences, 4253343569 bases, 2927343599 kmers +read 44300000 sequences, 4261209160 bases, 2932209190 kmers +read 44400000 sequences, 4269076502 bases, 2937076532 kmers +read 44500000 sequences, 4276951507 bases, 2941951537 kmers +read 44600000 sequences, 4284863476 bases, 2946863506 kmers +read 44700000 sequences, 4292779395 bases, 2951779425 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.17.bin'... +read 44800000 sequences, 4300761505 bases, 2956761535 kmers +read 44900000 sequences, 4308749134 bases, 2961749164 kmers +read 45000000 sequences, 4316730477 bases, 2966730507 kmers +read 45100000 sequences, 4324743898 bases, 2971743928 kmers +read 45200000 sequences, 4332706339 bases, 2976706369 kmers +read 45300000 sequences, 4340799702 bases, 2981799732 kmers +read 45400000 sequences, 4348938044 bases, 2986938074 kmers +read 45500000 sequences, 4357089148 bases, 2992089178 kmers +read 45600000 sequences, 4365213123 bases, 2997213153 kmers +read 45700000 sequences, 4373409255 bases, 3002409285 kmers +read 45800000 sequences, 4381555955 bases, 3007555985 kmers +read 45900000 sequences, 4389760124 bases, 3012760154 kmers +read 46000000 sequences, 4398064658 bases, 3018064688 kmers +read 46100000 sequences, 4406387022 bases, 3023387052 kmers +read 46200000 sequences, 4414694547 bases, 3028694577 kmers +read 46300000 sequences, 4423058645 bases, 3034058675 kmers +read 46400000 sequences, 4431425456 bases, 3039425486 kmers +read 46500000 sequences, 4439833395 bases, 3044833425 kmers +read 46600000 sequences, 4448259061 bases, 3050259091 kmers +read 46700000 sequences, 4456674986 bases, 3055675016 kmers +read 46800000 sequences, 4465159451 bases, 3061159481 kmers +read 46900000 sequences, 4473635424 bases, 3066635454 kmers +read 47000000 sequences, 4482251403 bases, 3072251433 kmers +read 47100000 sequences, 4490782771 bases, 3077782801 kmers +read 47200000 sequences, 4499401922 bases, 3083401952 kmers +read 47300000 sequences, 4508109132 bases, 3089109162 kmers +read 47400000 sequences, 4516742321 bases, 3094742351 kmers +read 47500000 sequences, 4525469588 bases, 3100469618 kmers +read 47600000 sequences, 4534242679 bases, 3106242709 kmers +read 47700000 sequences, 4542982876 bases, 3111982906 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.18.bin'... +read 47800000 sequences, 4551866465 bases, 3117866495 kmers +read 47900000 sequences, 4560699422 bases, 3123699452 kmers +read 48000000 sequences, 4569570230 bases, 3129570260 kmers +read 48100000 sequences, 4578501122 bases, 3135501152 kmers +read 48200000 sequences, 4587457020 bases, 3141457050 kmers +read 48300000 sequences, 4596477426 bases, 3147477456 kmers +read 48400000 sequences, 4605472867 bases, 3153472897 kmers +read 48500000 sequences, 4614570314 bases, 3159570344 kmers +read 48600000 sequences, 4623645571 bases, 3165645601 kmers +read 48700000 sequences, 4632844313 bases, 3171844343 kmers +read 48800000 sequences, 4642092013 bases, 3178092043 kmers +read 48900000 sequences, 4651346054 bases, 3184346084 kmers +read 49000000 sequences, 4660631564 bases, 3190631594 kmers +read 49100000 sequences, 4670019434 bases, 3197019464 kmers +read 49200000 sequences, 4679397122 bases, 3203397152 kmers +read 49300000 sequences, 4688892126 bases, 3209892156 kmers +read 49400000 sequences, 4698398402 bases, 3216398432 kmers +read 49500000 sequences, 4707888048 bases, 3222888078 kmers +read 49600000 sequences, 4717466104 bases, 3229466134 kmers +read 49700000 sequences, 4727070074 bases, 3236070104 kmers +read 49800000 sequences, 4736756111 bases, 3242756141 kmers +read 49900000 sequences, 4746452257 bases, 3249452287 kmers +read 50000000 sequences, 4756246274 bases, 3256246304 kmers +read 50100000 sequences, 4766057201 bases, 3263057231 kmers +read 50200000 sequences, 4775878485 bases, 3269878515 kmers +read 50300000 sequences, 4785727317 bases, 3276727347 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.19.bin'... +read 50400000 sequences, 4795653806 bases, 3283653836 kmers +read 50500000 sequences, 4805712567 bases, 3290712597 kmers +read 50600000 sequences, 4815738486 bases, 3297738516 kmers +read 50700000 sequences, 4825926061 bases, 3304926091 kmers +read 50800000 sequences, 4836224392 bases, 3312224422 kmers +read 50900000 sequences, 4846451475 bases, 3319451505 kmers +read 51000000 sequences, 4856753402 bases, 3326753432 kmers +read 51100000 sequences, 4867266326 bases, 3334266356 kmers +read 51200000 sequences, 4877861799 bases, 3341861829 kmers +read 51300000 sequences, 4888414798 bases, 3349414828 kmers +read 51400000 sequences, 4899113746 bases, 3357113776 kmers +read 51500000 sequences, 4909857519 bases, 3364857549 kmers +read 51600000 sequences, 4920671018 bases, 3372671048 kmers +read 51700000 sequences, 4931456761 bases, 3380456791 kmers +read 51800000 sequences, 4942313146 bases, 3388313176 kmers +read 51900000 sequences, 4953292346 bases, 3396292376 kmers +read 52000000 sequences, 4964398640 bases, 3404398670 kmers +read 52100000 sequences, 4975538457 bases, 3412538487 kmers +read 52200000 sequences, 4986759889 bases, 3420759919 kmers +read 52300000 sequences, 4998126697 bases, 3429126727 kmers +read 52400000 sequences, 5009563077 bases, 3437563107 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.20.bin'... +read 52500000 sequences, 5021022583 bases, 3446022613 kmers +read 52600000 sequences, 5032535927 bases, 3454535957 kmers +read 52700000 sequences, 5044218660 bases, 3463218690 kmers +read 52800000 sequences, 5055973619 bases, 3471973649 kmers +read 52900000 sequences, 5067872804 bases, 3480872834 kmers +read 53000000 sequences, 5079791399 bases, 3489791429 kmers +read 53100000 sequences, 5091783497 bases, 3498783527 kmers +read 53200000 sequences, 5103925550 bases, 3507925580 kmers +read 53300000 sequences, 5116240983 bases, 3517241013 kmers +read 53400000 sequences, 5128584923 bases, 3526584953 kmers +read 53500000 sequences, 5140962917 bases, 3535962947 kmers +read 53600000 sequences, 5153500478 bases, 3545500508 kmers +read 53700000 sequences, 5166148888 bases, 3555148918 kmers +read 53800000 sequences, 5178959197 bases, 3564959227 kmers +read 53900000 sequences, 5191971403 bases, 3574971433 kmers +read 54000000 sequences, 5205070227 bases, 3585070257 kmers +read 54100000 sequences, 5218297285 bases, 3595297315 kmers +read 54200000 sequences, 5231683093 bases, 3605683123 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.21.bin'... +read 54300000 sequences, 5245150344 bases, 3616150374 kmers +read 54400000 sequences, 5258739173 bases, 3626739203 kmers +read 54500000 sequences, 5272405047 bases, 3637405077 kmers +read 54600000 sequences, 5286362231 bases, 3648362261 kmers +read 54700000 sequences, 5300436701 bases, 3659436731 kmers +read 54800000 sequences, 5314343049 bases, 3670343079 kmers +read 54900000 sequences, 5328793168 bases, 3681793198 kmers +read 55000000 sequences, 5343494365 bases, 3693494395 kmers +read 55100000 sequences, 5358294825 bases, 3705294855 kmers +read 55200000 sequences, 5373204958 bases, 3717204988 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278222093021240.minimizers.22.bin'... +read 55207753 sequences, 5374353539 bases, 3718120949 kmers +num_kmers 3718120949 +num_super_kmers 665719630 +num_pieces 55207754 (+0.890898 [bits/kmer]) +=== step 1: 'parse_file' 193.423 [sec] (52.0218 [ns/kmer]) + == files to merge = 23 +num_written_tuples = 50000000 +num_written_tuples = 100000000 +num_written_tuples = 150000000 +num_written_tuples = 200000000 +num_written_tuples = 250000000 +num_written_tuples = 300000000 +num_written_tuples = 350000000 +num_written_tuples = 400000000 +num_written_tuples = 450000000 +num_written_tuples = 500000000 +num_written_tuples = 550000000 +num_written_tuples = 600000000 +num_written_tuples = 650000000 +num_written_tuples = 665719630 +num_minimizers 548538916 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 50.6416 [sec] (13.6202 [ns/kmer]) +bits_per_offset = ceil(log2(5374353570)) = 33 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278466893518192.bucket_pairs.0.bin'... +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278466893518192.bucket_pairs.1.bin'... +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764278466893518192.bucket_pairs.2.bin'... +num_singletons 496363606/548538916 (90.4883%) + == files to merge = 3 +num_written_pairs = 50000000 +num_written_pairs = 52175310 +=== step 3: 'build_index' 176.929 [sec] (47.5855 [ns/kmer]) +max_num_super_kmers_in_bucket 60581 +log2_max_num_super_kmers_in_bucket 16 +num_buckets_in_skew_index 108931/548538916 (0.0198584%) +num_partitions 7 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 28942203 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 21098680 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 14673643 + partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 9194452 + partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 6106530 + partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 3939806 + partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 60581: 4902431 +num_kmers_in_skew_index 88857745 (2.38986%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 28942203 + building MPHF with 64 threads and 256 partitions... + built mphs[0] for 28942203 keys; bits/key = 2.82568 + built positions[0] for 28942203 keys; bits/key = 7.00001 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 21098680 + building MPHF with 64 threads and 256 partitions... + built mphs[1] for 21098680 keys; bits/key = 2.8385 + built positions[1] for 21098680 keys; bits/key = 8.00002 + lower 256; upper 512; num_bits_per_pos 9; keys_in_partition.size() 14673643 + building MPHF with 64 threads and 256 partitions... + built mphs[2] for 14673643 keys; bits/key = 2.95348 + built positions[2] for 14673643 keys; bits/key = 9.00003 + lower 512; upper 1024; num_bits_per_pos 10; keys_in_partition.size() 9194452 + building MPHF with 64 threads and 256 partitions... + built mphs[3] for 9194452 keys; bits/key = 2.99486 + built positions[3] for 9194452 keys; bits/key = 10 + lower 1024; upper 2048; num_bits_per_pos 11; keys_in_partition.size() 6106530 + building MPHF with 64 threads and 256 partitions... + built mphs[4] for 6106530 keys; bits/key = 3.14313 + built positions[4] for 6106530 keys; bits/key = 11.0001 + lower 2048; upper 4096; num_bits_per_pos 12; keys_in_partition.size() 3939806 + building MPHF with 64 threads and 256 partitions... + built mphs[5] for 3939806 keys; bits/key = 3.19856 + built positions[5] for 3939806 keys; bits/key = 12.0001 + lower 4096; upper 60581; num_bits_per_pos 16; keys_in_partition.size() 4902431 + building MPHF with 64 threads and 256 partitions... + built mphs[6] for 4902431 keys; bits/key = 3.19579 + built positions[6] for 4902431 keys; bits/key = 16.0001 +num_bits_for_skew_index 1048290016(0.281941 [bits/kmer]) +=== step 4: 'build_skew_index' 12.7137 [sec] (3.4194 [ns/kmer]) +=== total_time 433.707 [sec] (116.647 [ns/kmer]) +total index size: 4592257806 [B] -- 4592.26 [MB] +SPACE BREAKDOWN: + minimizers: 0.3898 [bits/kmer] (2.64215 [bits/key]) -- 3.94501% + pieces: 0.14758 [bits/kmer] -- 1.4936% + num_super_kmers_before_bucket: 0.262034 [bits/kmer] -- 2.65195% + offsets: 5.90856 [bits/kmer] -- 59.7983% + strings: 2.8909 [bits/kmer] -- 29.2577% + skew_index: 0.281941 [bits/kmer] -- 2.85342% + weights: 3.95899e-07 [bits/kmer] -- 4.00674e-06% + weight_interval_values: 6.8852e-08 [bits/kmer] + weight_interval_lengths: 2.58195e-07 [bits/kmer] + weight_dictionary: 6.8852e-08 [bits/kmer] + -------------- + total: 9.88081 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 90.4883% +buckets with 2 super_kmers = 7.34098% +buckets with 3 super_kmers = 1.04053% +buckets with 4 super_kmers = 0.357644% +buckets with 5 super_kmers = 0.190087% +buckets with 6 super_kmers = 0.119244% +buckets with 7 super_kmers = 0.0815855% +buckets with 8 super_kmers = 0.0591728% +buckets with 9 super_kmers = 0.0447432% +buckets with 10 super_kmers = 0.0349151% +buckets with 11 super_kmers = 0.0279165% +buckets with 12 super_kmers = 0.0227947% +buckets with 13 super_kmers = 0.0188326% +buckets with 14 super_kmers = 0.0160384% +buckets with 15 super_kmers = 0.0135885% +buckets with 16 super_kmers = 0.0116692% +max_num_super_kmers_in_bucket 60581 +2025-11-27 22:24:17: saving data structure to disk... +2025-11-27 22:24:19: DONE diff --git a/benchmarks/results-27-11-25-v3/k31/regular-build.time.log b/benchmarks/results-27-11-25-v3/k31/regular-build.time.log new file mode 100644 index 0000000..297facb --- /dev/null +++ b/benchmarks/results-27-11-25-v3/k31/regular-build.time.log @@ -0,0 +1,138 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k31.eulertigs.fa.gz -k 31 -m 20 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/cod.k31.sshash" + User time (seconds): 56.43 + System time (seconds): 4.91 + Percent of CPU this job got: 140% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:43.57 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 3267596 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 1285906 + Voluntary context switches: 453 + Involuntary context switches: 532 + Swaps: 0 + File system inputs: 345544 + File system outputs: 8865216 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k31.eulertigs.fa.gz -k 31 -m 20 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/kestrel.k31.sshash" + User time (seconds): 141.00 + System time (seconds): 10.78 + Percent of CPU this job got: 137% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:50.63 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 7473836 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 2580204 + Voluntary context switches: 317 + Involuntary context switches: 977 + Swaps: 0 + File system inputs: 667848 + File system outputs: 20151280 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k31.eulertigs.fa.gz -k 31 -m 21 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/human.k31.sshash" + User time (seconds): 400.27 + System time (seconds): 25.66 + Percent of CPU this job got: 157% + Elapsed (wall clock) time (h:mm:ss or m:ss): 4:30.11 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 13548460 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 5451900 + Voluntary context switches: 1378 + Involuntary context switches: 2990 + Swaps: 0 + File system inputs: 1725176 + File system outputs: 47635376 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k31.eulertigs.fa.gz -k 31 -m 19 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/ncbi-virus.k31.sshash" + User time (seconds): 37.45 + System time (seconds): 3.87 + Percent of CPU this job got: 123% + Elapsed (wall clock) time (h:mm:ss or m:ss): 0:33.36 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 2300480 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 963934 + Voluntary context switches: 677 + Involuntary context switches: 343 + Swaps: 0 + File system inputs: 265424 + File system outputs: 6252040 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k31.eulertigs.fa.gz -k 31 -m 21 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/se.k31.sshash" + User time (seconds): 113.14 + System time (seconds): 9.40 + Percent of CPU this job got: 135% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:30.50 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 6236468 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 2257001 + Voluntary context switches: 496 + Involuntary context switches: 981 + Swaps: 0 + File system inputs: 996864 + File system outputs: 17778952 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k31.eulertigs.fa.gz -k 31 -m 21 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/hprc.k31.sshash" + User time (seconds): 727.79 + System time (seconds): 37.81 + Percent of CPU this job got: 174% + Elapsed (wall clock) time (h:mm:ss or m:ss): 7:17.52 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 19294572 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 7854638 + Voluntary context switches: 1355 + Involuntary context switches: 4402 + Swaps: 0 + File system inputs: 3670336 + File system outputs: 75085936 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-27-11-25-v3/k31/regular-streaming-queries-high-hit.log b/benchmarks/results-27-11-25-v3/k31/regular-streaming-queries-high-hit.log new file mode 100644 index 0000000..5c3ef62 --- /dev/null +++ b/benchmarks/results-27-11-25-v3/k31/regular-streaming-queries-high-hit.log @@ -0,0 +1,48 @@ +2025-11-28 15:11:25: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz'... +2025-11-28 15:11:48: DONE +==== query report: +num_kmers = 163287360 +num_positive_kmers = 132860997 (81.3664%) +num_searches = 26003321/132860997 (19.5718%) +num_extensions = 106857676/132860997 (80.4282%) +elapsed = 22893.3 millisec / 22.8933 sec / 0.381555 min / 140.203 ns/kmer +2025-11-28 15:11:49: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz'... +2025-11-28 15:14:35: DONE +==== query report: +num_kmers = 695737535 +num_positive_kmers = 525542891 (75.5375%) +num_searches = 91281351/525542891 (17.369%) +num_extensions = 434261540/525542891 (82.631%) +elapsed = 166627 millisec / 166.627 sec / 2.77712 min / 239.497 ns/kmer +2025-11-28 15:14:36: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2025-11-28 15:23:26: DONE +==== query report: +num_kmers = 1569974986 +num_positive_kmers = 1437949378 (91.5906%) +num_searches = 323160973/1437949378 (22.4737%) +num_extensions = 1114788405/1437949378 (77.5263%) +elapsed = 529247 millisec / 529.247 sec / 8.82078 min / 337.105 ns/kmer +2025-11-28 15:23:26: performing queries from file '/mnt/hd2/pibiri/DNA/queries/ncbi-queries.fastq.gz'... +2025-11-28 15:23:27: DONE +==== query report: +num_kmers = 14092875 +num_positive_kmers = 13983775 (99.2258%) +num_searches = 2503900/13983775 (17.9058%) +num_extensions = 11479875/13983775 (82.0942%) +elapsed = 1565.86 millisec / 1.56586 sec / 0.0260976 min / 111.11 ns/kmer +2025-11-28 15:23:28: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR27871075_1.fastq.gz'... +2025-11-28 15:31:05: DONE +==== query report: +num_kmers = 789838196 +num_positive_kmers = 764882549 (96.8404%) +num_searches = 309959354/764882549 (40.5238%) +num_extensions = 454923195/764882549 (59.4762%) +elapsed = 457145 millisec / 457.145 sec / 7.61908 min / 578.783 ns/kmer +2025-11-28 15:31:07: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2025-11-28 15:41:55: DONE +==== query report: +num_kmers = 1569974986 +num_positive_kmers = 1485223278 (94.6017%) +num_searches = 360774451/1485223278 (24.2909%) +num_extensions = 1124448827/1485223278 (75.7091%) +elapsed = 648068 millisec / 648.068 sec / 10.8011 min / 412.789 ns/kmer diff --git a/benchmarks/results-27-11-25-v3/k63/canon-bench.log b/benchmarks/results-27-11-25-v3/k63/canon-bench.log new file mode 100644 index 0000000..369986f --- /dev/null +++ b/benchmarks/results-27-11-25-v3/k63/canon-bench.log @@ -0,0 +1,108 @@ +avg_nanosec_per_positive_lookup 876.394 +avg_nanosec_per_negative_lookup 799.672 +avg_nanosec_per_positive_lookup_advanced 884.564 +avg_nanosec_per_negative_lookup_advanced 780.401 +avg_nanosec_per_access 309.258 +iterator: avg_nanosec_per_kmer 13.8343 +avg_nanosec_per_positive_lookup 884.522 +avg_nanosec_per_negative_lookup 791.339 +avg_nanosec_per_positive_lookup_advanced 880.852 +avg_nanosec_per_negative_lookup_advanced 781.203 +avg_nanosec_per_access 306.341 +iterator: avg_nanosec_per_kmer 13.9389 +avg_nanosec_per_positive_lookup 884.412 +avg_nanosec_per_negative_lookup 775.545 +avg_nanosec_per_positive_lookup_advanced 885.548 +avg_nanosec_per_negative_lookup_advanced 786.303 +avg_nanosec_per_access 311.145 +iterator: avg_nanosec_per_kmer 13.9717 +avg_nanosec_per_positive_lookup 790.669 +avg_nanosec_per_negative_lookup 831.062 +avg_nanosec_per_positive_lookup_advanced 775.663 +avg_nanosec_per_negative_lookup_advanced 822.557 +avg_nanosec_per_access 268.456 +iterator: avg_nanosec_per_kmer 13.8201 +avg_nanosec_per_positive_lookup 788.897 +avg_nanosec_per_negative_lookup 837.625 +avg_nanosec_per_positive_lookup_advanced 795.719 +avg_nanosec_per_negative_lookup_advanced 841.535 +avg_nanosec_per_access 268.574 +iterator: avg_nanosec_per_kmer 13.7701 +avg_nanosec_per_positive_lookup 790.506 +avg_nanosec_per_negative_lookup 822.841 +avg_nanosec_per_positive_lookup_advanced 776.425 +avg_nanosec_per_negative_lookup_advanced 820.502 +avg_nanosec_per_access 271.404 +iterator: avg_nanosec_per_kmer 14.1039 +avg_nanosec_per_positive_lookup 1122.58 +avg_nanosec_per_negative_lookup 992.934 +avg_nanosec_per_positive_lookup_advanced 1106.31 +avg_nanosec_per_negative_lookup_advanced 981.115 +avg_nanosec_per_access 355.232 +iterator: avg_nanosec_per_kmer 13.7348 +avg_nanosec_per_positive_lookup 1124.01 +avg_nanosec_per_negative_lookup 985.904 +avg_nanosec_per_positive_lookup_advanced 1125.37 +avg_nanosec_per_negative_lookup_advanced 984.882 +avg_nanosec_per_access 352.495 +iterator: avg_nanosec_per_kmer 13.7978 +avg_nanosec_per_positive_lookup 1103.27 +avg_nanosec_per_negative_lookup 983.145 +avg_nanosec_per_positive_lookup_advanced 1108.69 +avg_nanosec_per_negative_lookup_advanced 982.661 +avg_nanosec_per_access 352.654 +iterator: avg_nanosec_per_kmer 14.1428 +avg_nanosec_per_positive_lookup 740.434 +avg_nanosec_per_negative_lookup 771.135 +avg_nanosec_per_positive_lookup_advanced 746.781 +avg_nanosec_per_negative_lookup_advanced 766.275 +avg_nanosec_per_access 299.402 +iterator: avg_nanosec_per_kmer 13.7976 +avg_nanosec_per_positive_lookup 742.4 +avg_nanosec_per_negative_lookup 769.248 +avg_nanosec_per_positive_lookup_advanced 745.91 +avg_nanosec_per_negative_lookup_advanced 761.47 +avg_nanosec_per_access 300.413 +iterator: avg_nanosec_per_kmer 13.9399 +avg_nanosec_per_positive_lookup 754.041 +avg_nanosec_per_negative_lookup 772.095 +avg_nanosec_per_positive_lookup_advanced 761.422 +avg_nanosec_per_negative_lookup_advanced 777.676 +avg_nanosec_per_access 301.703 +iterator: avg_nanosec_per_kmer 13.9785 +avg_nanosec_per_positive_lookup 1852.7 +avg_nanosec_per_negative_lookup 1178.69 +avg_nanosec_per_positive_lookup_advanced 1861.07 +avg_nanosec_per_negative_lookup_advanced 1181.97 +avg_nanosec_per_access 435.589 +iterator: avg_nanosec_per_kmer 14.2016 +avg_nanosec_per_positive_lookup 1862.48 +avg_nanosec_per_negative_lookup 1173.81 +avg_nanosec_per_positive_lookup_advanced 1855.42 +avg_nanosec_per_negative_lookup_advanced 1170.37 +avg_nanosec_per_access 448.032 +iterator: avg_nanosec_per_kmer 14.2286 +avg_nanosec_per_positive_lookup 1876.48 +avg_nanosec_per_negative_lookup 1178.68 +avg_nanosec_per_positive_lookup_advanced 1858.57 +avg_nanosec_per_negative_lookup_advanced 1180.2 +avg_nanosec_per_access 437.978 +iterator: avg_nanosec_per_kmer 14.1341 +avg_nanosec_per_positive_lookup 1816.43 +avg_nanosec_per_negative_lookup 1491.33 +avg_nanosec_per_positive_lookup_advanced 1810.89 +avg_nanosec_per_negative_lookup_advanced 1476.5 +avg_nanosec_per_access 683.845 +iterator: avg_nanosec_per_kmer 14.2063 +avg_nanosec_per_positive_lookup 1832.36 +avg_nanosec_per_negative_lookup 1494.86 +avg_nanosec_per_positive_lookup_advanced 1823.53 +avg_nanosec_per_negative_lookup_advanced 1486.1 +avg_nanosec_per_access 679.221 +iterator: avg_nanosec_per_kmer 14.1995 +avg_nanosec_per_positive_lookup 1808.8 +avg_nanosec_per_negative_lookup 1489.24 +avg_nanosec_per_positive_lookup_advanced 1805.45 +avg_nanosec_per_negative_lookup_advanced 1503.49 +avg_nanosec_per_access 679.005 +iterator: avg_nanosec_per_kmer 14.1574 diff --git a/benchmarks/results-27-11-25-v3/k63/canon-build.log b/benchmarks/results-27-11-25-v3/k63/canon-build.log new file mode 100644 index 0000000..02ded11 --- /dev/null +++ b/benchmarks/results-27-11-25-v3/k63/canon-build.log @@ -0,0 +1,1465 @@ +k = 63, m = 24, seed = 1, l = 6, c = 3, canonical_parsing = true, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/cod.k63.eulertigs.fa.gz'... +m_buffer_size 29411764 +read 100000 sequences, 61951098 bases, 55751160 kmers +read 200000 sequences, 122972836 bases, 110572898 kmers +read 300000 sequences, 183599791 bases, 164999853 kmers +read 400000 sequences, 245140055 bases, 220340117 kmers +read 500000 sequences, 306871655 bases, 275871717 kmers +read 600000 sequences, 368254748 bases, 331054810 kmers +read 700000 sequences, 430272349 bases, 386872411 kmers +read 800000 sequences, 496894402 bases, 447294464 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764281980787087597.minimizers.0.bin'... +read 900000 sequences, 580548812 bases, 524748874 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764281980787087597.minimizers.1.bin'... +read 954555 sequences, 615768068 bases, 556585658 kmers +num_kmers 556585658 +num_super_kmers 35999202 +num_pieces 954556 (+0.212663 [bits/kmer]) +=== step 1: 'parse_file' 134.198 [sec] (241.109 [ns/kmer]) + == files to merge = 2 +num_written_tuples = 35999202 +num_minimizers 29634633 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 2.01297 [sec] (3.61665 [ns/kmer]) +bits_per_offset = ceil(log2(615768131)) = 30 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764282117040432330.bucket_pairs.0.bin'... +num_singletons 26933173/29634633 (90.8841%) +=== step 3: 'build_index' 4.44664 [sec] (7.98914 [ns/kmer]) +max_num_super_kmers_in_bucket 25891 +log2_max_num_super_kmers_in_bucket 15 +num_buckets_in_skew_index 5750/29634633 (0.019403%) +num_partitions 7 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 5076676 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 3464326 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 2275166 + partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 1228212 + partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 944675 + partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 1172379 + partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 25891: 2762357 +num_kmers_in_skew_index 16923791 (3.04064%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 5076676 + building MPHF with 64 threads and 256 partitions... + built mphs[0] for 5076676 keys; bits/key = 3.18432 + built positions[0] for 5076676 keys; bits/key = 7.00007 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 3464326 + building MPHF with 64 threads and 256 partitions... + built mphs[1] for 3464326 keys; bits/key = 3.27072 + built positions[1] for 3464326 keys; bits/key = 8.0001 + lower 256; upper 512; num_bits_per_pos 9; keys_in_partition.size() 2275166 + building MPHF with 64 threads and 113 partitions... + built mphs[2] for 2275166 keys; bits/key = 3.14325 + built positions[2] for 2275166 keys; bits/key = 9.00016 + lower 512; upper 1024; num_bits_per_pos 10; keys_in_partition.size() 1228212 + building MPHF with 64 threads and 61 partitions... + built mphs[3] for 1228212 keys; bits/key = 3.02254 + built positions[3] for 1228212 keys; bits/key = 10.0003 + lower 1024; upper 2048; num_bits_per_pos 11; keys_in_partition.size() 944675 + building MPHF with 64 threads and 47 partitions... + built mphs[4] for 944675 keys; bits/key = 2.99567 + built positions[4] for 944675 keys; bits/key = 11.0004 + lower 2048; upper 4096; num_bits_per_pos 12; keys_in_partition.size() 1172379 + building MPHF with 64 threads and 58 partitions... + built mphs[5] for 1172379 keys; bits/key = 3.01185 + built positions[5] for 1172379 keys; bits/key = 12.0003 + lower 4096; upper 25891; num_bits_per_pos 15; keys_in_partition.size() 2762357 + building MPHF with 64 threads and 256 partitions... + built mphs[6] for 2762357 keys; bits/key = 3.38579 + built positions[6] for 2762357 keys; bits/key = 15.0001 +num_bits_for_skew_index 215982112(0.388048 [bits/kmer]) +=== step 4: 'build_skew_index' 1.60188 [sec] (2.87805 [ns/kmer]) +=== total_time 142.259 [sec] (255.592 [ns/kmer]) +total index size: 334461174 [B] -- 334.461 [MB] +SPACE BREAKDOWN: + minimizers: 0.15013 [bits/kmer] (2.81968 [bits/key]) -- 3.12294% + pieces: 0.0214933 [bits/kmer] -- 0.447094% + num_super_kmers_before_bucket: 0.0946302 [bits/kmer] -- 1.96846% + offsets: 1.94036 [bits/kmer] -- 40.3625% + strings: 2.21266 [bits/kmer] -- 46.0269% + skew_index: 0.388048 [bits/kmer] -- 8.07202% + weights: 2.6447e-06 [bits/kmer] -- 5.50139e-05% + weight_interval_values: 4.59947e-07 [bits/kmer] + weight_interval_lengths: 1.7248e-06 [bits/kmer] + weight_dictionary: 4.59947e-07 [bits/kmer] + -------------- + total: 4.80733 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 90.8841% +buckets with 2 super_kmers = 6.99754% +buckets with 3 super_kmers = 0.925178% +buckets with 4 super_kmers = 0.38293% +buckets with 5 super_kmers = 0.209022% +buckets with 6 super_kmers = 0.128991% +buckets with 7 super_kmers = 0.086787% +buckets with 8 super_kmers = 0.0613336% +buckets with 9 super_kmers = 0.0456999% +buckets with 10 super_kmers = 0.0354855% +buckets with 11 super_kmers = 0.0268402% +buckets with 12 super_kmers = 0.0229158% +buckets with 13 super_kmers = 0.0188664% +buckets with 14 super_kmers = 0.0154178% +buckets with 15 super_kmers = 0.0131974% +buckets with 16 super_kmers = 0.0112638% +max_num_super_kmers_in_bucket 25891 +2025-11-27 23:22:03: saving data structure to disk... +2025-11-27 23:22:03: DONE +k = 63, m = 24, seed = 1, l = 6, c = 3, canonical_parsing = true, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/kestrel.k63.eulertigs.fa.gz'... +m_buffer_size 29411764 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764282123366699764.minimizers.0.bin'... +read 100000 sequences, 726199521 bases, 719999583 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764282123366699764.minimizers.1.bin'... +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764282123366699764.minimizers.2.bin'... +read 155784 sequences, 1164909275 bases, 1155250667 kmers +num_kmers 1155250667 +num_super_kmers 73789964 +num_pieces 155785 (+0.0167213 [bits/kmer]) +=== step 1: 'parse_file' 276.721 [sec] (239.534 [ns/kmer]) + == files to merge = 3 +num_written_tuples = 50000000 +num_written_tuples = 73789964 +num_minimizers 69116186 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 4.65197 [sec] (4.0268 [ns/kmer]) +bits_per_offset = ceil(log2(1164909338)) = 31 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764282404822659993.bucket_pairs.0.bin'... +num_singletons 65057767/69116186 (94.1281%) +=== step 3: 'build_index' 12.7972 [sec] (11.0775 [ns/kmer]) +max_num_super_kmers_in_bucket 1658 +log2_max_num_super_kmers_in_bucket 11 +num_buckets_in_skew_index 814/69116186 (0.00117773%) +num_partitions 5 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 745218 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 585708 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 406763 + partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 180974 + partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 1658: 80215 +num_kmers_in_skew_index 1998878 (0.173025%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 745218 + building MPHF with 64 threads and 37 partitions... + built mphs[0] for 745218 keys; bits/key = 2.97376 + built positions[0] for 745218 keys; bits/key = 7.0005 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 585708 + building MPHF with 64 threads and 29 partitions... + built mphs[1] for 585708 keys; bits/key = 2.96953 + built positions[1] for 585708 keys; bits/key = 8.0006 + lower 256; upper 512; num_bits_per_pos 9; keys_in_partition.size() 406763 + building MPHF with 64 threads and 20 partitions... + built mphs[2] for 406763 keys; bits/key = 2.94572 + built positions[2] for 406763 keys; bits/key = 9.00094 + lower 512; upper 1024; num_bits_per_pos 10; keys_in_partition.size() 180974 + building MPHF with 64 threads and 9 partitions... + built mphs[3] for 180974 keys; bits/key = 2.92895 + built positions[3] for 180974 keys; bits/key = 10.0021 + lower 1024; upper 1658; num_bits_per_pos 11; keys_in_partition.size() 80215 + building MPHF with 64 threads and 4 partitions... + built mphs[4] for 80215 keys; bits/key = 2.93252 + built positions[4] for 80215 keys; bits/key = 11.004 +num_bits_for_skew_index 22176032(0.0191959 [bits/kmer]) +=== step 4: 'build_skew_index' 0.4794 [sec] (0.414975 [ns/kmer]) +=== total_time 294.65 [sec] (255.053 [ns/kmer]) +total index size: 617733492 [B] -- 617.733 [MB] +SPACE BREAKDOWN: + minimizers: 0.162004 [bits/kmer] (2.70783 [bits/key]) -- 3.78713% + pieces: 0.00221454 [bits/kmer] -- 0.0517689% + num_super_kmers_before_bucket: 0.0975277 [bits/kmer] -- 2.27989% + offsets: 1.98008 [bits/kmer] -- 46.2879% + strings: 2.01672 [bits/kmer] -- 47.1445% + skew_index: 0.0191959 [bits/kmer] -- 0.448738% + weights: 1.27418e-06 [bits/kmer] -- 2.97863e-05% + weight_interval_values: 2.21597e-07 [bits/kmer] + weight_interval_lengths: 8.30988e-07 [bits/kmer] + weight_dictionary: 2.21597e-07 [bits/kmer] + -------------- + total: 4.27775 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 94.1281% +buckets with 2 super_kmers = 5.60305% +buckets with 3 super_kmers = 0.178858% +buckets with 4 super_kmers = 0.0313704% +buckets with 5 super_kmers = 0.0156071% +buckets with 6 super_kmers = 0.00951007% +buckets with 7 super_kmers = 0.00636899% +buckets with 8 super_kmers = 0.00436801% +buckets with 9 super_kmers = 0.00342467% +buckets with 10 super_kmers = 0.00260286% +buckets with 11 super_kmers = 0.00198362% +buckets with 12 super_kmers = 0.00156548% +buckets with 13 super_kmers = 0.00138463% +buckets with 14 super_kmers = 0.00108079% +buckets with 15 super_kmers = 0.000963595% +buckets with 16 super_kmers = 0.000807336% +max_num_super_kmers_in_bucket 1658 +2025-11-27 23:26:58: saving data structure to disk... +2025-11-27 23:26:58: DONE +k = 63, m = 25, seed = 1, l = 6, c = 3, canonical_parsing = true, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +m_buffer_size 29411764 +read 100000 sequences, 156072595 bases, 149872657 kmers +read 200000 sequences, 358422338 bases, 346022400 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764282418542609190.minimizers.0.bin'... +read 300000 sequences, 483978517 bases, 465378579 kmers +read 400000 sequences, 579661118 bases, 554861180 kmers +read 500000 sequences, 676694662 bases, 645694724 kmers +read 600000 sequences, 771042496 bases, 733842558 kmers +read 700000 sequences, 867361949 bases, 823962011 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764282418542609190.minimizers.1.bin'... +read 800000 sequences, 963947999 bases, 914348061 kmers +read 900000 sequences, 1061593876 bases, 1005793938 kmers +read 1000000 sequences, 1159508767 bases, 1097508829 kmers +read 1100000 sequences, 1258471359 bases, 1190271421 kmers +read 1200000 sequences, 1356700107 bases, 1282300169 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764282418542609190.minimizers.2.bin'... +read 1300000 sequences, 1455266386 bases, 1374666448 kmers +read 1400000 sequences, 1554765716 bases, 1467965778 kmers +read 1500000 sequences, 1655325517 bases, 1562325579 kmers +read 1600000 sequences, 1756677712 bases, 1657477774 kmers +read 1700000 sequences, 1857972502 bases, 1752572564 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764282418542609190.minimizers.3.bin'... +read 1800000 sequences, 1959725889 bases, 1848125951 kmers +read 1900000 sequences, 2064608705 bases, 1946808767 kmers +read 2000000 sequences, 2171598469 bases, 2047598531 kmers +read 2100000 sequences, 2280349838 bases, 2150149900 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764282418542609190.minimizers.4.bin'... +read 2200000 sequences, 2390837388 bases, 2254437450 kmers +read 2300000 sequences, 2504101994 bases, 2361502056 kmers +read 2400000 sequences, 2621983258 bases, 2473183320 kmers +read 2500000 sequences, 2745887962 bases, 2590888024 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764282418542609190.minimizers.5.bin'... +read 2600000 sequences, 2875578557 bases, 2714378619 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764282418542609190.minimizers.6.bin'... +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +num_super_kmers 183049284 +num_pieces 2642918 (+0.118255 [bits/kmer]) +=== step 1: 'parse_file' 654.74 [sec] (236.256 [ns/kmer]) + == files to merge = 7 +num_written_tuples = 50000000 +num_written_tuples = 100000000 +num_written_tuples = 150000000 +num_written_tuples = 183049284 +num_minimizers 150368801 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 11.44 [sec] (4.12799 [ns/kmer]) +bits_per_offset = ceil(log2(2935177010)) = 32 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283084742078429.bucket_pairs.0.bin'... +num_singletons 138365468/150368801 (92.0174%) +=== step 3: 'build_index' 37.4042 [sec] (13.4969 [ns/kmer]) +max_num_super_kmers_in_bucket 80956 +log2_max_num_super_kmers_in_bucket 17 +num_buckets_in_skew_index 38821/150368801 (0.0258172%) +num_partitions 7 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 27564246 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 25356364 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 22733436 + partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 19935611 + partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 18290988 + partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 15920993 + partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 80956: 34692171 +num_kmers_in_skew_index 164493809 (5.93558%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 27564246 + building MPHF with 64 threads and 256 partitions... + built mphs[0] for 27564246 keys; bits/key = 2.77233 + built positions[0] for 27564246 keys; bits/key = 7.00001 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 25356364 + building MPHF with 64 threads and 256 partitions... + built mphs[1] for 25356364 keys; bits/key = 2.7863 + built positions[1] for 25356364 keys; bits/key = 8.00001 + lower 256; upper 512; num_bits_per_pos 9; keys_in_partition.size() 22733436 + building MPHF with 64 threads and 256 partitions... + built mphs[2] for 22733436 keys; bits/key = 2.81617 + built positions[2] for 22733436 keys; bits/key = 9.00002 + lower 512; upper 1024; num_bits_per_pos 10; keys_in_partition.size() 19935611 + building MPHF with 64 threads and 256 partitions... + built mphs[3] for 19935611 keys; bits/key = 2.85625 + built positions[3] for 19935611 keys; bits/key = 10 + lower 1024; upper 2048; num_bits_per_pos 11; keys_in_partition.size() 18290988 + building MPHF with 64 threads and 256 partitions... + built mphs[4] for 18290988 keys; bits/key = 2.88201 + built positions[4] for 18290988 keys; bits/key = 11 + lower 2048; upper 4096; num_bits_per_pos 12; keys_in_partition.size() 15920993 + building MPHF with 64 threads and 256 partitions... + built mphs[5] for 15920993 keys; bits/key = 2.92135 + built positions[5] for 15920993 keys; bits/key = 12 + lower 4096; upper 80956; num_bits_per_pos 17; keys_in_partition.size() 34692171 + building MPHF with 64 threads and 256 partitions... + built mphs[6] for 34692171 keys; bits/key = 2.80829 + built positions[6] for 34692171 keys; bits/key = 17 +num_bits_for_skew_index 2246461168(0.810612 [bits/kmer]) +=== step 4: 'build_skew_index' 21.0849 [sec] (7.60825 [ns/kmer]) +=== total_time 724.669 [sec] (261.489 [ns/kmer]) +total index size: 1834574722 [B] -- 1834.57 [MB] +SPACE BREAKDOWN: + minimizers: 0.14412 [bits/kmer] (2.65614 [bits/key]) -- 2.72135% + pieces: 0.0126433 [bits/kmer] -- 0.238738% + num_super_kmers_before_bucket: 0.0966198 [bits/kmer] -- 1.82443% + offsets: 2.11364 [bits/kmer] -- 39.911% + strings: 2.11825 [bits/kmer] -- 39.9981% + skew_index: 0.810612 [bits/kmer] -- 15.3064% + weights: 5.31156e-07 [bits/kmer] -- 1.00296e-05% + weight_interval_values: 9.23749e-08 [bits/kmer] + weight_interval_lengths: 3.46406e-07 [bits/kmer] + weight_dictionary: 9.23749e-08 [bits/kmer] + -------------- + total: 5.29589 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 92.0174% +buckets with 2 super_kmers = 6.57404% +buckets with 3 super_kmers = 0.622859% +buckets with 4 super_kmers = 0.231803% +buckets with 5 super_kmers = 0.1262% +buckets with 6 super_kmers = 0.080006% +buckets with 7 super_kmers = 0.0549768% +buckets with 8 super_kmers = 0.0401785% +buckets with 9 super_kmers = 0.0304717% +buckets with 10 super_kmers = 0.0240861% +buckets with 11 super_kmers = 0.0194163% +buckets with 12 super_kmers = 0.0160598% +buckets with 13 super_kmers = 0.0135819% +buckets with 14 super_kmers = 0.0115616% +buckets with 15 super_kmers = 0.00982252% +buckets with 16 super_kmers = 0.0086913% +max_num_super_kmers_in_bucket 80956 +2025-11-27 23:39:03: saving data structure to disk... +2025-11-27 23:39:04: DONE +k = 63, m = 23, seed = 1, l = 6, c = 3, canonical_parsing = true, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k63.eulertigs.fa.gz'... +m_buffer_size 29411764 +read 100000 sequences, 48527990 bases, 42328052 kmers +read 200000 sequences, 101938174 bases, 89538236 kmers +read 300000 sequences, 171738001 bases, 153138063 kmers +read 400000 sequences, 279668649 bases, 254868711 kmers +read 500000 sequences, 322024377 bases, 291024439 kmers +read 600000 sequences, 367040069 bases, 329840131 kmers +read 700000 sequences, 412232472 bases, 368832534 kmers +read 800000 sequences, 457488794 bases, 407888856 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283144662988822.minimizers.0.bin'... +read 800231 sequences, 462130202 bases, 412515880 kmers +num_kmers 412515880 +num_super_kmers 26466010 +num_pieces 800232 (+0.240545 [bits/kmer]) +=== step 1: 'parse_file' 100.813 [sec] (244.385 [ns/kmer]) +num_minimizers 22039367 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 0.826242 [sec] (2.00293 [ns/kmer]) +bits_per_offset = ceil(log2(462130265)) = 29 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283246332743683.bucket_pairs.0.bin'... +num_singletons 19162998/22039367 (86.9489%) +=== step 3: 'build_index' 3.17876 [sec] (7.7058 [ns/kmer]) +max_num_super_kmers_in_bucket 722 +log2_max_num_super_kmers_in_bucket 10 +num_buckets_in_skew_index 29/22039367 (0.000131583%) +num_partitions 4 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 36447 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 6972 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 6633 + partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 722: 8845 +num_kmers_in_skew_index 58897 (0.0142775%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 36447 + building MPHF with 64 threads and 1 partitions... + built mphs[0] for 36447 keys; bits/key = 2.76083 + built positions[0] for 36447 keys; bits/key = 7.00985 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 6972 + building MPHF with 64 threads and 1 partitions... + built mphs[1] for 6972 keys; bits/key = 3.27252 + built positions[1] for 6972 keys; bits/key = 8.05049 + lower 256; upper 512; num_bits_per_pos 9; keys_in_partition.size() 6633 + building MPHF with 64 threads and 1 partitions... + built mphs[2] for 6633 keys; bits/key = 3.39153 + built positions[2] for 6633 keys; bits/key = 9.05051 + lower 512; upper 722; num_bits_per_pos 10; keys_in_partition.size() 8845 + building MPHF with 64 threads and 1 partitions... + built mphs[3] for 8845 keys; bits/key = 3.34652 + built positions[3] for 8845 keys; bits/key = 10.0432 +num_bits_for_skew_index 636208(0.00154226 [bits/kmer]) +=== step 4: 'build_skew_index' 0.154066 [sec] (0.373479 [ns/kmer]) +=== total_time 104.972 [sec] (254.467 [ns/kmer]) +total index size: 225173244 [B] -- 225.173 [MB] +SPACE BREAKDOWN: + minimizers: 0.146042 [bits/kmer] (2.7335 [bits/key]) -- 3.34435% + pieces: 0.0239116 [bits/kmer] -- 0.547574% + num_super_kmers_before_bucket: 0.0942127 [bits/kmer] -- 2.15746% + offsets: 1.86057 [bits/kmer] -- 42.6069% + strings: 2.24055 [bits/kmer] -- 51.3083% + skew_index: 0.00154226 [bits/kmer] -- 0.0353177% + weights: 3.56835e-06 [bits/kmer] -- 8.17149e-05% + weight_interval_values: 6.20582e-07 [bits/kmer] + weight_interval_lengths: 2.32718e-06 [bits/kmer] + weight_dictionary: 6.20582e-07 [bits/kmer] + -------------- + total: 4.36683 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 86.9489% +buckets with 2 super_kmers = 9.80079% +buckets with 3 super_kmers = 1.82954% +buckets with 4 super_kmers = 0.666848% +buckets with 5 super_kmers = 0.30254% +buckets with 6 super_kmers = 0.159315% +buckets with 7 super_kmers = 0.0922894% +buckets with 8 super_kmers = 0.0577467% +buckets with 9 super_kmers = 0.0374829% +buckets with 10 super_kmers = 0.0254681% +buckets with 11 super_kmers = 0.0181584% +buckets with 12 super_kmers = 0.0127091% +buckets with 13 super_kmers = 0.00979157% +buckets with 14 super_kmers = 0.00777699% +buckets with 15 super_kmers = 0.00571704% +buckets with 16 super_kmers = 0.00470975% +max_num_super_kmers_in_bucket 722 +2025-11-27 23:40:49: saving data structure to disk... +2025-11-27 23:40:49: DONE +k = 63, m = 31, seed = 1, l = 6, c = 3, canonical_parsing = true, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +m_buffer_size 29411764 +read 100000 sequences, 29016582 bases, 22816644 kmers +read 200000 sequences, 59512666 bases, 47112728 kmers +read 300000 sequences, 93672597 bases, 75072659 kmers +read 400000 sequences, 116504475 bases, 91704537 kmers +read 500000 sequences, 129640659 bases, 98640721 kmers +read 600000 sequences, 142888268 bases, 105688330 kmers +read 700000 sequences, 156121434 bases, 112721496 kmers +read 800000 sequences, 169296056 bases, 119696118 kmers +read 900000 sequences, 182473552 bases, 126673614 kmers +read 1000000 sequences, 195596530 bases, 133596592 kmers +read 1100000 sequences, 208767758 bases, 140567820 kmers +read 1200000 sequences, 222206136 bases, 147806198 kmers +read 1300000 sequences, 235298907 bases, 154698969 kmers +read 1400000 sequences, 248482551 bases, 161682613 kmers +read 1500000 sequences, 261588927 bases, 168588989 kmers +read 1600000 sequences, 275055123 bases, 175855185 kmers +read 1700000 sequences, 288035774 bases, 182635836 kmers +read 1800000 sequences, 301349714 bases, 189749776 kmers +read 1900000 sequences, 314875277 bases, 197075339 kmers +read 2000000 sequences, 328155322 bases, 204155384 kmers +read 2100000 sequences, 341368561 bases, 211168623 kmers +read 2200000 sequences, 354712477 bases, 218312539 kmers +read 2300000 sequences, 368071956 bases, 225472018 kmers +read 2400000 sequences, 381369058 bases, 232569120 kmers +read 2500000 sequences, 395076617 bases, 240076679 kmers +read 2600000 sequences, 408571137 bases, 247371199 kmers +read 2700000 sequences, 421770863 bases, 254370925 kmers +read 2800000 sequences, 434931009 bases, 261331071 kmers +read 2900000 sequences, 448298966 bases, 268499028 kmers +read 3000000 sequences, 461539188 bases, 275539250 kmers +read 3100000 sequences, 474884720 bases, 282684782 kmers +read 3200000 sequences, 488437755 bases, 290037817 kmers +read 3300000 sequences, 501681431 bases, 297081493 kmers +read 3400000 sequences, 514981599 bases, 304181661 kmers +read 3500000 sequences, 528308841 bases, 311308903 kmers +read 3600000 sequences, 541636881 bases, 318436943 kmers +read 3700000 sequences, 555162303 bases, 325762365 kmers +read 3800000 sequences, 568696721 bases, 333096783 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283249882127208.minimizers.0.bin'... +read 3900000 sequences, 581917274 bases, 340117336 kmers +read 4000000 sequences, 595230685 bases, 347230747 kmers +read 4100000 sequences, 608987239 bases, 354787301 kmers +read 4200000 sequences, 622285126 bases, 361885188 kmers +read 4300000 sequences, 635647812 bases, 369047874 kmers +read 4400000 sequences, 648992570 bases, 376192632 kmers +read 4500000 sequences, 662513011 bases, 383513073 kmers +read 4600000 sequences, 675930610 bases, 390730672 kmers +read 4700000 sequences, 689630791 bases, 398230853 kmers +read 4800000 sequences, 702998442 bases, 405398504 kmers +read 4900000 sequences, 716508958 bases, 412709020 kmers +read 5000000 sequences, 730213651 bases, 420213713 kmers +read 5100000 sequences, 743782088 bases, 427582150 kmers +read 5200000 sequences, 757644219 bases, 435244281 kmers +read 5300000 sequences, 771125055 bases, 442525117 kmers +read 5400000 sequences, 784690986 bases, 449891048 kmers +read 5500000 sequences, 798294730 bases, 457294792 kmers +read 5600000 sequences, 811868508 bases, 464668570 kmers +read 5700000 sequences, 825311135 bases, 471911197 kmers +read 5800000 sequences, 838965446 bases, 479365508 kmers +read 5900000 sequences, 852651285 bases, 486851347 kmers +read 6000000 sequences, 866286122 bases, 494286184 kmers +read 6100000 sequences, 879791056 bases, 501591118 kmers +read 6200000 sequences, 893568449 bases, 509168511 kmers +read 6300000 sequences, 907221007 bases, 516621069 kmers +read 6400000 sequences, 920850613 bases, 524050675 kmers +read 6500000 sequences, 934451411 bases, 531451473 kmers +read 6600000 sequences, 948058118 bases, 538858180 kmers +read 6700000 sequences, 961671957 bases, 546272019 kmers +read 6800000 sequences, 975396455 bases, 553796517 kmers +read 6900000 sequences, 989058845 bases, 561258907 kmers +read 7000000 sequences, 1002901429 bases, 568901491 kmers +read 7100000 sequences, 1016671629 bases, 576471691 kmers +read 7200000 sequences, 1030411544 bases, 584011606 kmers +read 7300000 sequences, 1044405065 bases, 591805127 kmers +read 7400000 sequences, 1058405891 bases, 599605953 kmers +read 7500000 sequences, 1072419133 bases, 607419195 kmers +read 7600000 sequences, 1086161468 bases, 614961530 kmers +read 7700000 sequences, 1100358314 bases, 622958376 kmers +read 7800000 sequences, 1114260927 bases, 630660989 kmers +read 7900000 sequences, 1128307710 bases, 638507772 kmers +read 8000000 sequences, 1142393895 bases, 646393957 kmers +read 8100000 sequences, 1156356962 bases, 654157024 kmers +read 8200000 sequences, 1170571834 bases, 662171896 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283249882127208.minimizers.1.bin'... +read 8300000 sequences, 1184451092 bases, 669851154 kmers +read 8400000 sequences, 1198299201 bases, 677499263 kmers +read 8500000 sequences, 1212442291 bases, 685442353 kmers +read 8600000 sequences, 1226404988 bases, 693205050 kmers +read 8700000 sequences, 1240702863 bases, 701302925 kmers +read 8800000 sequences, 1254656858 bases, 709056920 kmers +read 8900000 sequences, 1268621255 bases, 716821317 kmers +read 9000000 sequences, 1282878920 bases, 724878982 kmers +read 9100000 sequences, 1297350483 bases, 733150545 kmers +read 9200000 sequences, 1311515942 bases, 741116004 kmers +read 9300000 sequences, 1325689602 bases, 749089664 kmers +read 9400000 sequences, 1339681841 bases, 756881903 kmers +read 9500000 sequences, 1353794253 bases, 764794315 kmers +read 9600000 sequences, 1368052405 bases, 772852467 kmers +read 9700000 sequences, 1382480965 bases, 781081027 kmers +read 9800000 sequences, 1397029040 bases, 789429102 kmers +read 9900000 sequences, 1411261749 bases, 797461811 kmers +read 10000000 sequences, 1425636281 bases, 805636343 kmers +read 10100000 sequences, 1439978068 bases, 813778130 kmers +read 10200000 sequences, 1454565939 bases, 822166001 kmers +read 10300000 sequences, 1469139911 bases, 830539973 kmers +read 10400000 sequences, 1483590146 bases, 838790208 kmers +read 10500000 sequences, 1497966602 bases, 846966664 kmers +read 10600000 sequences, 1512656597 bases, 855456659 kmers +read 10700000 sequences, 1527179829 bases, 863779891 kmers +read 10800000 sequences, 1541782704 bases, 872182766 kmers +read 10900000 sequences, 1556446136 bases, 880646198 kmers +read 11000000 sequences, 1571132909 bases, 889132971 kmers +read 11100000 sequences, 1585816988 bases, 897617050 kmers +read 11200000 sequences, 1600557306 bases, 906157368 kmers +read 11300000 sequences, 1615262515 bases, 914662577 kmers +read 11400000 sequences, 1629999559 bases, 923199621 kmers +read 11500000 sequences, 1644860500 bases, 931860562 kmers +read 11600000 sequences, 1659779470 bases, 940579532 kmers +read 11700000 sequences, 1674792249 bases, 949392311 kmers +read 11800000 sequences, 1689736650 bases, 958136712 kmers +read 11900000 sequences, 1704892540 bases, 967092602 kmers +read 12000000 sequences, 1720028604 bases, 976028666 kmers +read 12100000 sequences, 1735135339 bases, 984935401 kmers +read 12200000 sequences, 1750296958 bases, 993897020 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283249882127208.minimizers.2.bin'... +read 12300000 sequences, 1765691210 bases, 1003091272 kmers +read 12400000 sequences, 1781168535 bases, 1012368597 kmers +read 12500000 sequences, 1796591631 bases, 1021591693 kmers +read 12600000 sequences, 1811900898 bases, 1030700960 kmers +read 12700000 sequences, 1827876385 bases, 1040476447 kmers +read 12800000 sequences, 1843393661 bases, 1049793723 kmers +read 12900000 sequences, 1858842530 bases, 1059042592 kmers +read 13000000 sequences, 1874571274 bases, 1068571336 kmers +read 13100000 sequences, 1890244064 bases, 1078044126 kmers +read 13200000 sequences, 1905959037 bases, 1087559099 kmers +read 13300000 sequences, 1922228213 bases, 1097628275 kmers +read 13400000 sequences, 1938519944 bases, 1107720006 kmers +read 13500000 sequences, 1954664776 bases, 1117664838 kmers +read 13600000 sequences, 1970696460 bases, 1127496522 kmers +read 13700000 sequences, 1987164724 bases, 1137764786 kmers +read 13800000 sequences, 2003371237 bases, 1147771299 kmers +read 13900000 sequences, 2019649705 bases, 1157849767 kmers +read 14000000 sequences, 2036208623 bases, 1168208685 kmers +read 14100000 sequences, 2052985966 bases, 1178786028 kmers +read 14200000 sequences, 2069712096 bases, 1189312158 kmers +read 14300000 sequences, 2086760464 bases, 1200160526 kmers +read 14400000 sequences, 2103719550 bases, 1210919612 kmers +read 14500000 sequences, 2120768440 bases, 1221768502 kmers +read 14600000 sequences, 2137973451 bases, 1232773513 kmers +read 14700000 sequences, 2155333447 bases, 1243933509 kmers +read 14800000 sequences, 2173028894 bases, 1255428956 kmers +read 14900000 sequences, 2190761817 bases, 1266961879 kmers +read 15000000 sequences, 2208992504 bases, 1278992566 kmers +read 15100000 sequences, 2226993777 bases, 1290793839 kmers +read 15200000 sequences, 2245240709 bases, 1302840771 kmers +read 15300000 sequences, 2264077653 bases, 1315477715 kmers +read 15400000 sequences, 2282789133 bases, 1327989195 kmers +read 15500000 sequences, 2301835592 bases, 1340835654 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283249882127208.minimizers.3.bin'... +read 15600000 sequences, 2321337067 bases, 1354137129 kmers +read 15700000 sequences, 2341431050 bases, 1368031112 kmers +read 15800000 sequences, 2361547779 bases, 1381947841 kmers +read 15900000 sequences, 2382125643 bases, 1396325705 kmers +read 16000000 sequences, 2403497084 bases, 1411497146 kmers +read 16100000 sequences, 2424934249 bases, 1426734311 kmers +read 16200000 sequences, 2447209283 bases, 1442809345 kmers +read 16300000 sequences, 2470231701 bases, 1459631763 kmers +read 16400000 sequences, 2494280866 bases, 1477480928 kmers +read 16500000 sequences, 2519161479 bases, 1496161541 kmers +read 16600000 sequences, 2545406671 bases, 1516206733 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283249882127208.minimizers.4.bin'... +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +num_super_kmers 132294927 +num_pieces 16636524 (+1.35283 [bits/kmer]) +=== step 1: 'parse_file' 336.352 [sec] (220.573 [ns/kmer]) + == files to merge = 5 +num_written_tuples = 50000000 +num_written_tuples = 100000000 +num_written_tuples = 132294927 +num_minimizers 69798549 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 6.74322 [sec] (4.42206 [ns/kmer]) +bits_per_offset = ceil(log2(2556368645)) = 32 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283593123304899.bucket_pairs.0.bin'... +num_singletons 54695912/69798549 (78.3625%) +=== step 3: 'build_index' 16.3246 [sec] (10.7053 [ns/kmer]) +max_num_super_kmers_in_bucket 106968 +log2_max_num_super_kmers_in_bucket 17 +num_buckets_in_skew_index 96980/69798549 (0.138943%) +num_partitions 7 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 82970878 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 28124833 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 14721360 + partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 6468312 + partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 3625995 + partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 2841777 + partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 106968: 11322400 +num_kmers_in_skew_index 150075555 (9.84164%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 82970878 + building MPHF with 64 threads and 256 partitions... + built mphs[0] for 82970878 keys; bits/key = 2.68699 + built positions[0] for 82970878 keys; bits/key = 7 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 28124833 + building MPHF with 64 threads and 256 partitions... + built mphs[1] for 28124833 keys; bits/key = 2.7999 + built positions[1] for 28124833 keys; bits/key = 8.00001 + lower 256; upper 512; num_bits_per_pos 9; keys_in_partition.size() 14721360 + building MPHF with 64 threads and 256 partitions... + built mphs[2] for 14721360 keys; bits/key = 2.94826 + built positions[2] for 14721360 keys; bits/key = 9.00002 + lower 512; upper 1024; num_bits_per_pos 10; keys_in_partition.size() 6468312 + building MPHF with 64 threads and 256 partitions... + built mphs[3] for 6468312 keys; bits/key = 3.12586 + built positions[3] for 6468312 keys; bits/key = 10.0001 + lower 1024; upper 2048; num_bits_per_pos 11; keys_in_partition.size() 3625995 + building MPHF with 64 threads and 256 partitions... + built mphs[4] for 3625995 keys; bits/key = 3.24643 + built positions[4] for 3625995 keys; bits/key = 11.0001 + lower 2048; upper 4096; num_bits_per_pos 12; keys_in_partition.size() 2841777 + building MPHF with 64 threads and 256 partitions... + built mphs[5] for 2841777 keys; bits/key = 3.37226 + built positions[5] for 2841777 keys; bits/key = 12.0001 + lower 4096; upper 106968; num_bits_per_pos 17; keys_in_partition.size() 11322400 + building MPHF with 64 threads and 256 partitions... + built mphs[6] for 11322400 keys; bits/key = 3.00593 + built positions[6] for 11322400 keys; bits/key = 17 +num_bits_for_skew_index 1690139792(1.10836 [bits/kmer]) +=== step 4: 'build_skew_index' 25.8472 [sec] (16.9501 [ns/kmer]) +=== total_time 385.267 [sec] (252.65 [ns/kmer]) +total index size: 1446316528 [B] -- 1446.32 [MB] +SPACE BREAKDOWN: + minimizers: 0.123857 [bits/kmer] (2.70592 [bits/key]) -- 1.63233% + pieces: 0.113881 [bits/kmer] -- 1.50086% + num_super_kmers_before_bucket: 0.11259 [bits/kmer] -- 1.48385% + offsets: 2.7762 [bits/kmer] -- 36.5881% + strings: 3.35283 [bits/kmer] -- 44.1876% + skew_index: 1.10836 [bits/kmer] -- 14.6073% + weights: 9.65307e-07 [bits/kmer] -- 1.2722e-05% + weight_interval_values: 1.67879e-07 [bits/kmer] + weight_interval_lengths: 6.29548e-07 [bits/kmer] + weight_dictionary: 1.67879e-07 [bits/kmer] + -------------- + total: 7.58771 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 78.3625% +buckets with 2 super_kmers = 12.341% +buckets with 3 super_kmers = 3.49507% +buckets with 4 super_kmers = 1.61562% +buckets with 5 super_kmers = 0.906593% +buckets with 6 super_kmers = 0.580495% +buckets with 7 super_kmers = 0.403437% +buckets with 8 super_kmers = 0.297214% +buckets with 9 super_kmers = 0.228013% +buckets with 10 super_kmers = 0.179978% +buckets with 11 super_kmers = 0.146271% +buckets with 12 super_kmers = 0.121415% +buckets with 13 super_kmers = 0.102183% +buckets with 14 super_kmers = 0.0880205% +buckets with 15 super_kmers = 0.0757365% +buckets with 16 super_kmers = 0.0666547% +max_num_super_kmers_in_bucket 106968 +2025-11-27 23:47:15: saving data structure to disk... +2025-11-27 23:47:16: DONE +k = 63, m = 31, seed = 1, l = 6, c = 3, canonical_parsing = true, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/hprc.k63.eulertigs.fa.gz'... +m_buffer_size 29411764 +read 100000 sequences, 12340428 bases, 6140490 kmers +read 200000 sequences, 24646464 bases, 12246526 kmers +read 300000 sequences, 37018030 bases, 18418092 kmers +read 400000 sequences, 49394989 bases, 24595051 kmers +read 500000 sequences, 61758753 bases, 30758815 kmers +read 600000 sequences, 74141065 bases, 36941127 kmers +read 700000 sequences, 86514469 bases, 43114531 kmers +read 800000 sequences, 98874419 bases, 49274481 kmers +read 900000 sequences, 111243104 bases, 55443166 kmers +read 1000000 sequences, 123617917 bases, 61617979 kmers +read 1100000 sequences, 136042481 bases, 67842543 kmers +read 1200000 sequences, 148420567 bases, 74020629 kmers +read 1300000 sequences, 160880986 bases, 80281048 kmers +read 1400000 sequences, 173296738 bases, 86496800 kmers +read 1500000 sequences, 185720015 bases, 92720077 kmers +read 1600000 sequences, 198143788 bases, 98943850 kmers +read 1700000 sequences, 210604030 bases, 105204092 kmers +read 1800000 sequences, 223058949 bases, 111459011 kmers +read 1900000 sequences, 235529393 bases, 117729455 kmers +read 2000000 sequences, 248006332 bases, 124006394 kmers +read 2100000 sequences, 260467827 bases, 130267889 kmers +read 2200000 sequences, 272924289 bases, 136524351 kmers +read 2300000 sequences, 285441228 bases, 142841290 kmers +read 2400000 sequences, 297932568 bases, 149132630 kmers +read 2500000 sequences, 310463939 bases, 155464001 kmers +read 2600000 sequences, 323014521 bases, 161814583 kmers +read 2700000 sequences, 335527696 bases, 168127758 kmers +read 2800000 sequences, 348042722 bases, 174442784 kmers +read 2900000 sequences, 360581821 bases, 180781883 kmers +read 3000000 sequences, 373165681 bases, 187165743 kmers +read 3100000 sequences, 385714108 bases, 193514170 kmers +read 3200000 sequences, 398280992 bases, 199881054 kmers +read 3300000 sequences, 410870862 bases, 206270924 kmers +read 3400000 sequences, 423449691 bases, 212649753 kmers +read 3500000 sequences, 436046745 bases, 219046807 kmers +read 3600000 sequences, 448679809 bases, 225479871 kmers +read 3700000 sequences, 461321374 bases, 231921436 kmers +read 3800000 sequences, 473931319 bases, 238331381 kmers +read 3900000 sequences, 486569329 bases, 244769391 kmers +read 4000000 sequences, 499216681 bases, 251216743 kmers +read 4100000 sequences, 511895744 bases, 257695806 kmers +read 4200000 sequences, 524602447 bases, 264202509 kmers +read 4300000 sequences, 537274929 bases, 270674991 kmers +read 4400000 sequences, 549984249 bases, 277184311 kmers +read 4500000 sequences, 562699936 bases, 283699998 kmers +read 4600000 sequences, 575434830 bases, 290234892 kmers +read 4700000 sequences, 588209883 bases, 296809945 kmers +read 4800000 sequences, 600967786 bases, 303367848 kmers +read 4900000 sequences, 613729111 bases, 309929173 kmers +read 5000000 sequences, 626504015 bases, 316504077 kmers +read 5100000 sequences, 639246673 bases, 323046735 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283636241049811.minimizers.0.bin'... +read 5200000 sequences, 652025570 bases, 329625632 kmers +read 5300000 sequences, 664852185 bases, 336252247 kmers +read 5400000 sequences, 677687532 bases, 342887594 kmers +read 5500000 sequences, 690503191 bases, 349503253 kmers +read 5600000 sequences, 703329021 bases, 356129083 kmers +read 5700000 sequences, 716216575 bases, 362816637 kmers +read 5800000 sequences, 729051677 bases, 369451739 kmers +read 5900000 sequences, 741931929 bases, 376131991 kmers +read 6000000 sequences, 754844870 bases, 382844932 kmers +read 6100000 sequences, 767765015 bases, 389565077 kmers +read 6200000 sequences, 780677651 bases, 396277713 kmers +read 6300000 sequences, 793620604 bases, 403020666 kmers +read 6400000 sequences, 806637866 bases, 409837928 kmers +read 6500000 sequences, 819598810 bases, 416598872 kmers +read 6600000 sequences, 832595190 bases, 423395252 kmers +read 6700000 sequences, 845581476 bases, 430181538 kmers +read 6800000 sequences, 858575752 bases, 436975814 kmers +read 6900000 sequences, 871622804 bases, 443822866 kmers +read 7000000 sequences, 884611433 bases, 450611495 kmers +read 7100000 sequences, 897672844 bases, 457472906 kmers +read 7200000 sequences, 910732890 bases, 464332952 kmers +read 7300000 sequences, 923812876 bases, 471212938 kmers +read 7400000 sequences, 936909486 bases, 478109548 kmers +read 7500000 sequences, 949997076 bases, 484997138 kmers +read 7600000 sequences, 963107948 bases, 491908010 kmers +read 7700000 sequences, 976263311 bases, 498863373 kmers +read 7800000 sequences, 989391312 bases, 505791374 kmers +read 7900000 sequences, 1002542416 bases, 512742478 kmers +read 8000000 sequences, 1015719792 bases, 519719854 kmers +read 8100000 sequences, 1028930102 bases, 526730164 kmers +read 8200000 sequences, 1042133340 bases, 533733402 kmers +read 8300000 sequences, 1055343002 bases, 540743064 kmers +read 8400000 sequences, 1068571302 bases, 547771364 kmers +read 8500000 sequences, 1081782071 bases, 554782133 kmers +read 8600000 sequences, 1095081331 bases, 561881393 kmers +read 8700000 sequences, 1108381691 bases, 568981753 kmers +read 8800000 sequences, 1121704459 bases, 576104521 kmers +read 8900000 sequences, 1135025716 bases, 583225778 kmers +read 9000000 sequences, 1148384003 bases, 590384065 kmers +read 9100000 sequences, 1161802419 bases, 597602481 kmers +read 9200000 sequences, 1175228269 bases, 604828331 kmers +read 9300000 sequences, 1188645635 bases, 612045697 kmers +read 9400000 sequences, 1202107172 bases, 619307234 kmers +read 9500000 sequences, 1215616855 bases, 626616917 kmers +read 9600000 sequences, 1229082244 bases, 633882306 kmers +read 9700000 sequences, 1242623466 bases, 641223528 kmers +read 9800000 sequences, 1256182360 bases, 648582422 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283636241049811.minimizers.1.bin'... +read 9900000 sequences, 1269727590 bases, 655927652 kmers +read 10000000 sequences, 1283305118 bases, 663305180 kmers +read 10100000 sequences, 1296926048 bases, 670726110 kmers +read 10200000 sequences, 1310556982 bases, 678157044 kmers +read 10300000 sequences, 1324233550 bases, 685633612 kmers +read 10400000 sequences, 1337906859 bases, 693106921 kmers +read 10500000 sequences, 1351643168 bases, 700643230 kmers +read 10600000 sequences, 1365330951 bases, 708131013 kmers +read 10700000 sequences, 1379108463 bases, 715708525 kmers +read 10800000 sequences, 1392877680 bases, 723277742 kmers +read 10900000 sequences, 1406644001 bases, 730844063 kmers +read 11000000 sequences, 1420392493 bases, 738392555 kmers +read 11100000 sequences, 1434208255 bases, 746008317 kmers +read 11200000 sequences, 1448044904 bases, 753644966 kmers +read 11300000 sequences, 1461973739 bases, 761373801 kmers +read 11400000 sequences, 1475908384 bases, 769108446 kmers +read 11500000 sequences, 1489828655 bases, 776828717 kmers +read 11600000 sequences, 1503804136 bases, 784604198 kmers +read 11700000 sequences, 1517825110 bases, 792425172 kmers +read 11800000 sequences, 1531854849 bases, 800254911 kmers +read 11900000 sequences, 1545912444 bases, 808112506 kmers +read 12000000 sequences, 1559904915 bases, 815904977 kmers +read 12100000 sequences, 1574126558 bases, 823926620 kmers +read 12200000 sequences, 1588253922 bases, 831853984 kmers +read 12300000 sequences, 1602371618 bases, 839771680 kmers +read 12400000 sequences, 1616537157 bases, 847737219 kmers +read 12500000 sequences, 1630788602 bases, 855788664 kmers +read 12600000 sequences, 1644978314 bases, 863778376 kmers +read 12700000 sequences, 1659241146 bases, 871841208 kmers +read 12800000 sequences, 1673584118 bases, 879984180 kmers +read 12900000 sequences, 1687940515 bases, 888140577 kmers +read 13000000 sequences, 1702343713 bases, 896343775 kmers +read 13100000 sequences, 1716759504 bases, 904559566 kmers +read 13200000 sequences, 1731228620 bases, 912828682 kmers +read 13300000 sequences, 1745703567 bases, 921103629 kmers +read 13400000 sequences, 1760203118 bases, 929403180 kmers +read 13500000 sequences, 1774792602 bases, 937792664 kmers +read 13600000 sequences, 1789438596 bases, 946238658 kmers +read 13700000 sequences, 1804059588 bases, 954659650 kmers +read 13800000 sequences, 1818743867 bases, 963143929 kmers +read 13900000 sequences, 1833395553 bases, 971595615 kmers +read 14000000 sequences, 1848153717 bases, 980153779 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283636241049811.minimizers.2.bin'... +read 14100000 sequences, 1862998527 bases, 988798589 kmers +read 14200000 sequences, 1877911963 bases, 997512025 kmers +read 14300000 sequences, 1892816751 bases, 1006216813 kmers +read 14400000 sequences, 1907719813 bases, 1014919875 kmers +read 14500000 sequences, 1922766528 bases, 1023766590 kmers +read 14600000 sequences, 1937831250 bases, 1032631312 kmers +read 14700000 sequences, 1952937508 bases, 1041537570 kmers +read 14800000 sequences, 1968048267 bases, 1050448329 kmers +read 14900000 sequences, 1983251368 bases, 1059451430 kmers +read 15000000 sequences, 1998487486 bases, 1068487548 kmers +read 15100000 sequences, 2013788234 bases, 1077588296 kmers +read 15200000 sequences, 2029113702 bases, 1086713764 kmers +read 15300000 sequences, 2044464630 bases, 1095864692 kmers +read 15400000 sequences, 2059908322 bases, 1105108384 kmers +read 15500000 sequences, 2075402983 bases, 1114403045 kmers +read 15600000 sequences, 2090966187 bases, 1123766249 kmers +read 15700000 sequences, 2106543558 bases, 1133143620 kmers +read 15800000 sequences, 2122217022 bases, 1142617084 kmers +read 15900000 sequences, 2137840083 bases, 1152040145 kmers +read 16000000 sequences, 2153589344 bases, 1161589406 kmers +read 16100000 sequences, 2169260628 bases, 1171060690 kmers +read 16200000 sequences, 2185176930 bases, 1180776992 kmers +read 16300000 sequences, 2201140458 bases, 1190540520 kmers +read 16400000 sequences, 2217102473 bases, 1200302535 kmers +read 16500000 sequences, 2233154507 bases, 1210154569 kmers +read 16600000 sequences, 2249289162 bases, 1220089224 kmers +read 16700000 sequences, 2265554961 bases, 1230155023 kmers +read 16800000 sequences, 2281740014 bases, 1240140076 kmers +read 16900000 sequences, 2298107838 bases, 1250307900 kmers +read 17000000 sequences, 2314472095 bases, 1260472157 kmers +read 17100000 sequences, 2331031176 bases, 1270831238 kmers +read 17200000 sequences, 2347660266 bases, 1281260328 kmers +read 17300000 sequences, 2364281388 bases, 1291681450 kmers +read 17400000 sequences, 2381150963 bases, 1302351025 kmers +read 17500000 sequences, 2398014863 bases, 1313014925 kmers +read 17600000 sequences, 2414848399 bases, 1323648461 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283636241049811.minimizers.3.bin'... +read 17700000 sequences, 2431827698 bases, 1334427760 kmers +read 17800000 sequences, 2448880349 bases, 1345280411 kmers +read 17900000 sequences, 2466079400 bases, 1356279462 kmers +read 18000000 sequences, 2483330959 bases, 1367331021 kmers +read 18100000 sequences, 2500735710 bases, 1378535772 kmers +read 18200000 sequences, 2518182276 bases, 1389782338 kmers +read 18300000 sequences, 2535776920 bases, 1401176982 kmers +read 18400000 sequences, 2553420194 bases, 1412620256 kmers +read 18500000 sequences, 2571217426 bases, 1424217488 kmers +read 18600000 sequences, 2589036466 bases, 1435836528 kmers +read 18700000 sequences, 2607070465 bases, 1447670527 kmers +read 18800000 sequences, 2625276965 bases, 1459677027 kmers +read 18900000 sequences, 2643374208 bases, 1471574270 kmers +read 19000000 sequences, 2661730118 bases, 1483730180 kmers +read 19100000 sequences, 2680149690 bases, 1495949752 kmers +read 19200000 sequences, 2698694575 bases, 1508294637 kmers +read 19300000 sequences, 2717352858 bases, 1520752920 kmers +read 19400000 sequences, 2736256794 bases, 1533456856 kmers +read 19500000 sequences, 2755257099 bases, 1546257161 kmers +read 19600000 sequences, 2774351092 bases, 1559151154 kmers +read 19700000 sequences, 2793728147 bases, 1572328209 kmers +read 19800000 sequences, 2813220812 bases, 1585620874 kmers +read 19900000 sequences, 2832722318 bases, 1598922380 kmers +read 20000000 sequences, 2852409685 bases, 1612409747 kmers +read 20100000 sequences, 2872347441 bases, 1626147503 kmers +read 20200000 sequences, 2892389463 bases, 1639989525 kmers +read 20300000 sequences, 2912735330 bases, 1654135392 kmers +read 20400000 sequences, 2933385055 bases, 1668585117 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283636241049811.minimizers.4.bin'... +read 20500000 sequences, 2954091410 bases, 1683091472 kmers +read 20600000 sequences, 2974925213 bases, 1697725275 kmers +read 20700000 sequences, 2995915692 bases, 1712515754 kmers +read 20800000 sequences, 3017122624 bases, 1727522686 kmers +read 20900000 sequences, 3038531652 bases, 1742731714 kmers +read 21000000 sequences, 3060194460 bases, 1758194522 kmers +read 21100000 sequences, 3081907028 bases, 1773707090 kmers +read 21200000 sequences, 3104086626 bases, 1789686688 kmers +read 21300000 sequences, 3126473195 bases, 1805873257 kmers +read 21400000 sequences, 3149107906 bases, 1822307968 kmers +read 21500000 sequences, 3171928079 bases, 1838928141 kmers +read 21600000 sequences, 3194957971 bases, 1855758033 kmers +read 21700000 sequences, 3218340993 bases, 1872941055 kmers +read 21800000 sequences, 3241998343 bases, 1890398405 kmers +read 21900000 sequences, 3265867458 bases, 1908067520 kmers +read 22000000 sequences, 3290140116 bases, 1926140178 kmers +read 22100000 sequences, 3314773229 bases, 1944573291 kmers +read 22200000 sequences, 3339683134 bases, 1963283196 kmers +read 22300000 sequences, 3365026133 bases, 1982426195 kmers +read 22400000 sequences, 3390524791 bases, 2001724853 kmers +read 22500000 sequences, 3416602833 bases, 2021602895 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283636241049811.minimizers.5.bin'... +read 22600000 sequences, 3443068690 bases, 2041868752 kmers +read 22700000 sequences, 3469843700 bases, 2062443762 kmers +read 22800000 sequences, 3496984478 bases, 2083384540 kmers +read 22900000 sequences, 3524534052 bases, 2104734114 kmers +read 23000000 sequences, 3552570866 bases, 2126570928 kmers +read 23100000 sequences, 3581192297 bases, 2148992359 kmers +read 23200000 sequences, 3610381722 bases, 2171981784 kmers +read 23300000 sequences, 3639850904 bases, 2195250966 kmers +read 23400000 sequences, 3669893515 bases, 2219093577 kmers +read 23500000 sequences, 3700753180 bases, 2243753242 kmers +read 23600000 sequences, 3732004041 bases, 2268804103 kmers +read 23700000 sequences, 3763988485 bases, 2294588547 kmers +read 23800000 sequences, 3796342436 bases, 2320742498 kmers +read 23900000 sequences, 3829616775 bases, 2347816837 kmers +read 24000000 sequences, 3863888823 bases, 2375888885 kmers +read 24100000 sequences, 3898563459 bases, 2404363521 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283636241049811.minimizers.6.bin'... +read 24200000 sequences, 3934104488 bases, 2433704550 kmers +read 24300000 sequences, 3970663407 bases, 2464063469 kmers +read 24400000 sequences, 4008289022 bases, 2495489084 kmers +read 24500000 sequences, 4046653033 bases, 2527653095 kmers +read 24600000 sequences, 4085473878 bases, 2560273940 kmers +read 24700000 sequences, 4125325829 bases, 2593925891 kmers +read 24800000 sequences, 4166644355 bases, 2629044417 kmers +read 24900000 sequences, 4209155738 bases, 2665355800 kmers +read 25000000 sequences, 4253357779 bases, 2703357841 kmers +read 25100000 sequences, 4298305920 bases, 2742105982 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283636241049811.minimizers.7.bin'... +read 25200000 sequences, 4345040766 bases, 2782640828 kmers +read 25300000 sequences, 4392930039 bases, 2824330101 kmers +read 25400000 sequences, 4442653000 bases, 2867853062 kmers +read 25500000 sequences, 4494210063 bases, 2913210125 kmers +read 25600000 sequences, 4547341577 bases, 2960141639 kmers +read 25700000 sequences, 4602288626 bases, 3008888688 kmers +read 25800000 sequences, 4659267065 bases, 3059667127 kmers +read 25900000 sequences, 4718698709 bases, 3112898771 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283636241049811.minimizers.8.bin'... +read 26000000 sequences, 4780487522 bases, 3168487584 kmers +read 26100000 sequences, 4845830481 bases, 3227630543 kmers +read 26200000 sequences, 4913892030 bases, 3289492092 kmers +read 26300000 sequences, 4985279649 bases, 3354679711 kmers +read 26400000 sequences, 5059257799 bases, 3422457861 kmers +read 26500000 sequences, 5137444349 bases, 3494444411 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283636241049811.minimizers.9.bin'... +read 26600000 sequences, 5220217341 bases, 3571017403 kmers +read 26700000 sequences, 5307644757 bases, 3652244819 kmers +read 26800000 sequences, 5400268538 bases, 3738668600 kmers +read 26900000 sequences, 5498820865 bases, 3831020927 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283636241049811.minimizers.10.bin'... +read 27000000 sequences, 5604483878 bases, 3930483940 kmers +read 27100000 sequences, 5717045771 bases, 4036845833 kmers +read 27200000 sequences, 5833610977 bases, 4147211039 kmers +read 27300000 sequences, 5846284831 bases, 4153684893 kmers +read 27400000 sequences, 5857666136 bases, 4158866198 kmers +read 27500000 sequences, 5869033199 bases, 4164033261 kmers +read 27600000 sequences, 5880411993 bases, 4169212055 kmers +read 27700000 sequences, 5891780011 bases, 4174380073 kmers +read 27800000 sequences, 5903175513 bases, 4179575575 kmers +read 27900000 sequences, 5914554374 bases, 4184754436 kmers +read 28000000 sequences, 5925952810 bases, 4189952872 kmers +read 28100000 sequences, 5937329908 bases, 4195129970 kmers +read 28200000 sequences, 5948710629 bases, 4200310691 kmers +read 28300000 sequences, 5960084042 bases, 4205484104 kmers +read 28400000 sequences, 5971480602 bases, 4210680664 kmers +read 28500000 sequences, 5982848841 bases, 4215848903 kmers +read 28600000 sequences, 5994224296 bases, 4221024358 kmers +read 28700000 sequences, 6005620451 bases, 4226220513 kmers +read 28800000 sequences, 6016990442 bases, 4231390504 kmers +read 28900000 sequences, 6028385648 bases, 4236585710 kmers +read 29000000 sequences, 6039783825 bases, 4241783887 kmers +read 29100000 sequences, 6051178683 bases, 4246978745 kmers +read 29200000 sequences, 6062557847 bases, 4252157909 kmers +read 29300000 sequences, 6073940813 bases, 4257340875 kmers +read 29400000 sequences, 6085331250 bases, 4262531312 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283636241049811.minimizers.11.bin'... +read 29500000 sequences, 6096728346 bases, 4267728408 kmers +read 29600000 sequences, 6108102978 bases, 4272903040 kmers +read 29700000 sequences, 6119481358 bases, 4278081420 kmers +read 29800000 sequences, 6130856114 bases, 4283256176 kmers +read 29900000 sequences, 6142238297 bases, 4288438359 kmers +read 30000000 sequences, 6153634829 bases, 4293634891 kmers +read 30100000 sequences, 6165020049 bases, 4298820111 kmers +read 30200000 sequences, 6176433423 bases, 4304033485 kmers +read 30300000 sequences, 6187857491 bases, 4309257553 kmers +read 30400000 sequences, 6199252194 bases, 4314452256 kmers +read 30500000 sequences, 6210672548 bases, 4319672610 kmers +read 30600000 sequences, 6222091861 bases, 4324891923 kmers +read 30700000 sequences, 6233490435 bases, 4330090497 kmers +read 30800000 sequences, 6244878831 bases, 4335278893 kmers +read 30900000 sequences, 6256282376 bases, 4340482438 kmers +read 31000000 sequences, 6267683956 bases, 4345684018 kmers +read 31100000 sequences, 6279097813 bases, 4350897875 kmers +read 31200000 sequences, 6290508351 bases, 4356108413 kmers +read 31300000 sequences, 6301930261 bases, 4361330323 kmers +read 31400000 sequences, 6313323207 bases, 4366523269 kmers +read 31500000 sequences, 6324738619 bases, 4371738681 kmers +read 31600000 sequences, 6336131574 bases, 4376931636 kmers +read 31700000 sequences, 6347524494 bases, 4382124556 kmers +read 31800000 sequences, 6358957002 bases, 4387357064 kmers +read 31900000 sequences, 6370374986 bases, 4392575048 kmers +read 32000000 sequences, 6381788111 bases, 4397788173 kmers +read 32100000 sequences, 6393224920 bases, 4403024982 kmers +read 32200000 sequences, 6404645946 bases, 4408246008 kmers +read 32300000 sequences, 6416049834 bases, 4413449896 kmers +read 32400000 sequences, 6427468467 bases, 4418668529 kmers +read 32500000 sequences, 6438906136 bases, 4423906198 kmers +read 32600000 sequences, 6450334707 bases, 4429134769 kmers +read 32700000 sequences, 6461755008 bases, 4434355070 kmers +read 32800000 sequences, 6473195587 bases, 4439595649 kmers +read 32900000 sequences, 6484644167 bases, 4444844229 kmers +read 33000000 sequences, 6496092416 bases, 4450092478 kmers +read 33100000 sequences, 6507511441 bases, 4455311503 kmers +read 33200000 sequences, 6518945085 bases, 4460545147 kmers +read 33300000 sequences, 6530384425 bases, 4465784487 kmers +read 33400000 sequences, 6541805219 bases, 4471005281 kmers +read 33500000 sequences, 6553237504 bases, 4476237566 kmers +read 33600000 sequences, 6564689101 bases, 4481489163 kmers +read 33700000 sequences, 6576122082 bases, 4486722144 kmers +read 33800000 sequences, 6587572222 bases, 4491972284 kmers +read 33900000 sequences, 6599018743 bases, 4497218805 kmers +read 34000000 sequences, 6610456706 bases, 4502456768 kmers +read 34100000 sequences, 6621897287 bases, 4507697349 kmers +read 34200000 sequences, 6633348561 bases, 4512948623 kmers +read 34300000 sequences, 6644796892 bases, 4518196954 kmers +read 34400000 sequences, 6656241823 bases, 4523441885 kmers +read 34500000 sequences, 6667691043 bases, 4528691105 kmers +read 34600000 sequences, 6679157213 bases, 4533957275 kmers +read 34700000 sequences, 6690642188 bases, 4539242250 kmers +read 34800000 sequences, 6702109473 bases, 4544509535 kmers +read 34900000 sequences, 6713569359 bases, 4549769421 kmers +read 35000000 sequences, 6725025515 bases, 4555025577 kmers +read 35100000 sequences, 6736483861 bases, 4560283923 kmers +read 35200000 sequences, 6747950533 bases, 4565550595 kmers +read 35300000 sequences, 6759386980 bases, 4570787042 kmers +read 35400000 sequences, 6770873467 bases, 4576073529 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283636241049811.minimizers.12.bin'... +read 35500000 sequences, 6782328796 bases, 4581328858 kmers +read 35600000 sequences, 6793801566 bases, 4586601628 kmers +read 35700000 sequences, 6805288962 bases, 4591889024 kmers +read 35800000 sequences, 6816766599 bases, 4597166661 kmers +read 35900000 sequences, 6828221603 bases, 4602421665 kmers +read 36000000 sequences, 6839697263 bases, 4607697325 kmers +read 36100000 sequences, 6851167968 bases, 4612968030 kmers +read 36200000 sequences, 6862661069 bases, 4618261131 kmers +read 36300000 sequences, 6874122160 bases, 4623522222 kmers +read 36400000 sequences, 6885601876 bases, 4628801938 kmers +read 36500000 sequences, 6897090990 bases, 4634091052 kmers +read 36600000 sequences, 6908577092 bases, 4639377154 kmers +read 36700000 sequences, 6920070395 bases, 4644670457 kmers +read 36800000 sequences, 6931582953 bases, 4649983015 kmers +read 36900000 sequences, 6943074936 bases, 4655274998 kmers +read 37000000 sequences, 6954565924 bases, 4660565986 kmers +read 37100000 sequences, 6966070452 bases, 4665870514 kmers +read 37200000 sequences, 6977560415 bases, 4671160477 kmers +read 37300000 sequences, 6989066925 bases, 4676466987 kmers +read 37400000 sequences, 7000566400 bases, 4681766462 kmers +read 37500000 sequences, 7012064100 bases, 4687064162 kmers +read 37600000 sequences, 7023578971 bases, 4692379033 kmers +read 37700000 sequences, 7035113747 bases, 4697713809 kmers +read 37800000 sequences, 7046611117 bases, 4703011179 kmers +read 37900000 sequences, 7058112871 bases, 4708312933 kmers +read 38000000 sequences, 7069620689 bases, 4713620751 kmers +read 38100000 sequences, 7081124602 bases, 4718924664 kmers +read 38200000 sequences, 7092641574 bases, 4724241636 kmers +read 38300000 sequences, 7104173321 bases, 4729573383 kmers +read 38400000 sequences, 7115693749 bases, 4734893811 kmers +read 38500000 sequences, 7127223394 bases, 4740223456 kmers +read 38600000 sequences, 7138742801 bases, 4745542863 kmers +read 38700000 sequences, 7150287920 bases, 4750887982 kmers +read 38800000 sequences, 7161816661 bases, 4756216723 kmers +read 38900000 sequences, 7173324334 bases, 4761524396 kmers +read 39000000 sequences, 7184856265 bases, 4766856327 kmers +read 39100000 sequences, 7196427309 bases, 4772227371 kmers +read 39200000 sequences, 7207963095 bases, 4777563157 kmers +read 39300000 sequences, 7219512038 bases, 4782912100 kmers +read 39400000 sequences, 7231058926 bases, 4788258988 kmers +read 39500000 sequences, 7242615745 bases, 4793615807 kmers +read 39600000 sequences, 7254151997 bases, 4798952059 kmers +read 39700000 sequences, 7265709248 bases, 4804309310 kmers +read 39800000 sequences, 7277261549 bases, 4809661611 kmers +read 39900000 sequences, 7288795953 bases, 4814996015 kmers +read 40000000 sequences, 7300352386 bases, 4820352448 kmers +read 40100000 sequences, 7311909203 bases, 4825709265 kmers +read 40200000 sequences, 7323455080 bases, 4831055142 kmers +read 40300000 sequences, 7335021048 bases, 4836421110 kmers +read 40400000 sequences, 7346572779 bases, 4841772841 kmers +read 40500000 sequences, 7358141861 bases, 4847141923 kmers +read 40600000 sequences, 7369700966 bases, 4852501028 kmers +read 40700000 sequences, 7381268484 bases, 4857868546 kmers +read 40800000 sequences, 7392842850 bases, 4863242912 kmers +read 40900000 sequences, 7404426052 bases, 4868626114 kmers +read 41000000 sequences, 7415987087 bases, 4873987149 kmers +read 41100000 sequences, 7427574902 bases, 4879374964 kmers +read 41200000 sequences, 7439175505 bases, 4884775567 kmers +read 41300000 sequences, 7450739513 bases, 4890139575 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283636241049811.minimizers.13.bin'... +read 41400000 sequences, 7462313943 bases, 4895514005 kmers +read 41500000 sequences, 7473895566 bases, 4900895628 kmers +read 41600000 sequences, 7485498872 bases, 4906298934 kmers +read 41700000 sequences, 7497097591 bases, 4911697653 kmers +read 41800000 sequences, 7508699276 bases, 4917099338 kmers +read 41900000 sequences, 7520283386 bases, 4922483448 kmers +read 42000000 sequences, 7531875484 bases, 4927875546 kmers +read 42100000 sequences, 7543485183 bases, 4933285245 kmers +read 42200000 sequences, 7555088346 bases, 4938688408 kmers +read 42300000 sequences, 7566689709 bases, 4944089771 kmers +read 42400000 sequences, 7578300740 bases, 4949500802 kmers +read 42500000 sequences, 7589916966 bases, 4954917028 kmers +read 42600000 sequences, 7601521236 bases, 4960321298 kmers +read 42700000 sequences, 7613130580 bases, 4965730642 kmers +read 42800000 sequences, 7624734645 bases, 4971134707 kmers +read 42900000 sequences, 7636365632 bases, 4976565694 kmers +read 43000000 sequences, 7647987121 bases, 4981987183 kmers +read 43100000 sequences, 7659607327 bases, 4987407389 kmers +read 43200000 sequences, 7671221477 bases, 4992821539 kmers +read 43300000 sequences, 7682851013 bases, 4998251075 kmers +read 43400000 sequences, 7694478022 bases, 5003678084 kmers +read 43500000 sequences, 7706091680 bases, 5009091742 kmers +read 43600000 sequences, 7717739091 bases, 5014539153 kmers +read 43700000 sequences, 7729379970 bases, 5019980032 kmers +read 43800000 sequences, 7741005738 bases, 5025405800 kmers +read 43900000 sequences, 7752654757 bases, 5030854819 kmers +read 44000000 sequences, 7764325440 bases, 5036325502 kmers +read 44100000 sequences, 7775977254 bases, 5041777316 kmers +read 44200000 sequences, 7787629671 bases, 5047229733 kmers +read 44300000 sequences, 7799282350 bases, 5052682412 kmers +read 44400000 sequences, 7810951187 bases, 5058151249 kmers +read 44500000 sequences, 7822598421 bases, 5063598483 kmers +read 44600000 sequences, 7834244714 bases, 5069044776 kmers +read 44700000 sequences, 7845891240 bases, 5074491302 kmers +read 44800000 sequences, 7857565519 bases, 5079965581 kmers +read 44900000 sequences, 7869252351 bases, 5085452413 kmers +read 45000000 sequences, 7880919098 bases, 5090919160 kmers +read 45100000 sequences, 7892576949 bases, 5096377011 kmers +read 45200000 sequences, 7904241929 bases, 5101841991 kmers +read 45300000 sequences, 7915920658 bases, 5107320720 kmers +read 45400000 sequences, 7927605513 bases, 5112805575 kmers +read 45500000 sequences, 7939292816 bases, 5118292878 kmers +read 45600000 sequences, 7950975991 bases, 5123776053 kmers +read 45700000 sequences, 7962666759 bases, 5129266821 kmers +read 45800000 sequences, 7974341946 bases, 5134742008 kmers +read 45900000 sequences, 7986038612 bases, 5140238674 kmers +read 46000000 sequences, 7997748818 bases, 5145748880 kmers +read 46100000 sequences, 8009477725 bases, 5151277787 kmers +read 46200000 sequences, 8021166242 bases, 5156766304 kmers +read 46300000 sequences, 8032854170 bases, 5162254232 kmers +read 46400000 sequences, 8044555955 bases, 5167756017 kmers +read 46500000 sequences, 8056240494 bases, 5173240556 kmers +read 46600000 sequences, 8067931653 bases, 5178731715 kmers +read 46700000 sequences, 8079648908 bases, 5184248970 kmers +read 46800000 sequences, 8091384712 bases, 5189784774 kmers +read 46900000 sequences, 8103124079 bases, 5195324141 kmers +read 47000000 sequences, 8114852096 bases, 5200852158 kmers +read 47100000 sequences, 8126577432 bases, 5206377494 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283636241049811.minimizers.14.bin'... +read 47200000 sequences, 8138321107 bases, 5211921169 kmers +read 47300000 sequences, 8150082884 bases, 5217482946 kmers +read 47400000 sequences, 8161823368 bases, 5223023430 kmers +read 47500000 sequences, 8173558332 bases, 5228558394 kmers +read 47600000 sequences, 8185297180 bases, 5234097242 kmers +read 47700000 sequences, 8197020764 bases, 5239620826 kmers +read 47800000 sequences, 8208783838 bases, 5245183900 kmers +read 47900000 sequences, 8220547786 bases, 5250747848 kmers +read 48000000 sequences, 8232292674 bases, 5256292736 kmers +read 48100000 sequences, 8244054434 bases, 5261854496 kmers +read 48200000 sequences, 8255808686 bases, 5267408748 kmers +read 48300000 sequences, 8267574482 bases, 5272974544 kmers +read 48400000 sequences, 8279340872 bases, 5278540934 kmers +read 48500000 sequences, 8291110333 bases, 5284110395 kmers +read 48600000 sequences, 8302889275 bases, 5289689337 kmers +read 48700000 sequences, 8314648922 bases, 5295248984 kmers +read 48800000 sequences, 8326414162 bases, 5300814224 kmers +read 48900000 sequences, 8338207337 bases, 5306407399 kmers +read 49000000 sequences, 8349993258 bases, 5311993320 kmers +read 49100000 sequences, 8361791349 bases, 5317591411 kmers +read 49200000 sequences, 8373582804 bases, 5323182866 kmers +read 49300000 sequences, 8385394571 bases, 5328794633 kmers +read 49400000 sequences, 8397202995 bases, 5334403057 kmers +read 49500000 sequences, 8409020292 bases, 5340020354 kmers +read 49600000 sequences, 8420828815 bases, 5345628877 kmers +read 49700000 sequences, 8432643281 bases, 5351243343 kmers +read 49800000 sequences, 8444451677 bases, 5356851739 kmers +read 49900000 sequences, 8456260166 bases, 5362460228 kmers +read 50000000 sequences, 8468086036 bases, 5368086098 kmers +read 50100000 sequences, 8479897293 bases, 5373697355 kmers +read 50200000 sequences, 8491729262 bases, 5379329324 kmers +read 50300000 sequences, 8503563407 bases, 5384963469 kmers +read 50400000 sequences, 8515408110 bases, 5390608172 kmers +read 50500000 sequences, 8527226877 bases, 5396226939 kmers +read 50600000 sequences, 8539067669 bases, 5401867731 kmers +read 50700000 sequences, 8550899987 bases, 5407500049 kmers +read 50800000 sequences, 8562770507 bases, 5413170569 kmers +read 50900000 sequences, 8574595545 bases, 5418795607 kmers +read 51000000 sequences, 8586456412 bases, 5424456474 kmers +read 51100000 sequences, 8598316076 bases, 5430116138 kmers +read 51200000 sequences, 8610172262 bases, 5435772324 kmers +read 51300000 sequences, 8622055720 bases, 5441455782 kmers +read 51400000 sequences, 8633927263 bases, 5447127325 kmers +read 51500000 sequences, 8645823524 bases, 5452823586 kmers +read 51600000 sequences, 8657691355 bases, 5458491417 kmers +read 51700000 sequences, 8669585563 bases, 5464185625 kmers +read 51800000 sequences, 8681474258 bases, 5469874320 kmers +read 51900000 sequences, 8693381907 bases, 5475581969 kmers +read 52000000 sequences, 8705279756 bases, 5481279818 kmers +read 52100000 sequences, 8717209451 bases, 5487009513 kmers +read 52200000 sequences, 8729118869 bases, 5492718931 kmers +read 52300000 sequences, 8741023453 bases, 5498423515 kmers +read 52400000 sequences, 8752969676 bases, 5504169738 kmers +read 52500000 sequences, 8764903676 bases, 5509903738 kmers +read 52600000 sequences, 8776822814 bases, 5515622876 kmers +read 52700000 sequences, 8788751015 bases, 5521351077 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283636241049811.minimizers.15.bin'... +read 52800000 sequences, 8800669725 bases, 5527069787 kmers +read 52900000 sequences, 8812598349 bases, 5532798411 kmers +read 53000000 sequences, 8824571572 bases, 5538571634 kmers +read 53100000 sequences, 8836518883 bases, 5544318945 kmers +read 53200000 sequences, 8848466396 bases, 5550066458 kmers +read 53300000 sequences, 8860413169 bases, 5555813231 kmers +read 53400000 sequences, 8872384224 bases, 5561584286 kmers +read 53500000 sequences, 8884332155 bases, 5567332217 kmers +read 53600000 sequences, 8896284511 bases, 5573084573 kmers +read 53700000 sequences, 8908270586 bases, 5578870648 kmers +read 53800000 sequences, 8920267982 bases, 5584668044 kmers +read 53900000 sequences, 8932245973 bases, 5590446035 kmers +read 54000000 sequences, 8944259742 bases, 5596259804 kmers +read 54100000 sequences, 8956269308 bases, 5602069370 kmers +read 54200000 sequences, 8968276811 bases, 5607876873 kmers +read 54300000 sequences, 8980258845 bases, 5613658907 kmers +read 54400000 sequences, 8992247801 bases, 5619447863 kmers +read 54500000 sequences, 9004254133 bases, 5625254195 kmers +read 54600000 sequences, 9016278273 bases, 5631078335 kmers +read 54700000 sequences, 9028312029 bases, 5636912091 kmers +read 54800000 sequences, 9040331856 bases, 5642731918 kmers +read 54900000 sequences, 9052350704 bases, 5648550766 kmers +read 55000000 sequences, 9064361175 bases, 5654361237 kmers +read 55100000 sequences, 9076390694 bases, 5660190756 kmers +read 55200000 sequences, 9088446082 bases, 5666046144 kmers +read 55300000 sequences, 9100525786 bases, 5671925848 kmers +read 55400000 sequences, 9112587186 bases, 5677787248 kmers +read 55500000 sequences, 9124623681 bases, 5683623743 kmers +read 55600000 sequences, 9136715382 bases, 5689515444 kmers +read 55700000 sequences, 9148800729 bases, 5695400791 kmers +read 55800000 sequences, 9160843647 bases, 5701243709 kmers +read 55900000 sequences, 9172925217 bases, 5707125279 kmers +read 56000000 sequences, 9185024099 bases, 5713024161 kmers +read 56100000 sequences, 9197117041 bases, 5718917103 kmers +read 56200000 sequences, 9209230518 bases, 5724830580 kmers +read 56300000 sequences, 9221344370 bases, 5730744432 kmers +read 56400000 sequences, 9233465281 bases, 5736665343 kmers +read 56500000 sequences, 9245571873 bases, 5742571935 kmers +read 56600000 sequences, 9257641594 bases, 5748441656 kmers +read 56700000 sequences, 9269746121 bases, 5754346183 kmers +read 56800000 sequences, 9281862419 bases, 5760262481 kmers +read 56900000 sequences, 9294025235 bases, 5766225297 kmers +read 57000000 sequences, 9306137861 bases, 5772137923 kmers +read 57100000 sequences, 9318287515 bases, 5778087577 kmers +read 57200000 sequences, 9330445537 bases, 5784045599 kmers +read 57300000 sequences, 9342614929 bases, 5790014991 kmers +read 57400000 sequences, 9354790599 bases, 5795990661 kmers +read 57500000 sequences, 9366968184 bases, 5801968246 kmers +read 57600000 sequences, 9379136016 bases, 5807936078 kmers +read 57700000 sequences, 9391324284 bases, 5813924346 kmers +read 57800000 sequences, 9403502226 bases, 5819902288 kmers +read 57900000 sequences, 9415684578 bases, 5825884640 kmers +read 58000000 sequences, 9427875846 bases, 5831875908 kmers +read 58100000 sequences, 9440085310 bases, 5837885372 kmers +read 58200000 sequences, 9452280471 bases, 5843880533 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283636241049811.minimizers.16.bin'... +read 58300000 sequences, 9464471817 bases, 5849871879 kmers +read 58400000 sequences, 9476715907 bases, 5855915969 kmers +read 58500000 sequences, 9488919933 bases, 5861919995 kmers +read 58600000 sequences, 9501146085 bases, 5867946147 kmers +read 58700000 sequences, 9513406483 bases, 5874006545 kmers +read 58800000 sequences, 9525653213 bases, 5880053275 kmers +read 58900000 sequences, 9537897953 bases, 5886098015 kmers +read 59000000 sequences, 9550182056 bases, 5892182118 kmers +read 59100000 sequences, 9562418655 bases, 5898218717 kmers +read 59200000 sequences, 9574664926 bases, 5904264988 kmers +read 59300000 sequences, 9586958590 bases, 5910358652 kmers +read 59400000 sequences, 9599304698 bases, 5916504760 kmers +read 59500000 sequences, 9611572956 bases, 5922573018 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764283636241049811.minimizers.17.bin'... +read 59568965 sequences, 9620061299 bases, 5926785469 kmers +num_kmers 5926785469 +num_super_kmers 507530068 +num_pieces 59568966 (+1.2463 [bits/kmer]) +=== step 1: 'parse_file' 1302.48 [sec] (219.761 [ns/kmer]) + == files to merge = 18 +num_written_tuples = 50000000 +num_written_tuples = 100000000 +num_written_tuples = 150000000 +num_written_tuples = 200000000 +num_written_tuples = 250000000 +num_written_tuples = 300000000 +num_written_tuples = 350000000 +num_written_tuples = 400000000 +num_written_tuples = 450000000 +num_written_tuples = 500000000 +num_written_tuples = 507530068 +num_minimizers 296127983 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 32.7983 [sec] (5.5339 [ns/kmer]) +bits_per_offset = ceil(log2(9620061362)) = 34 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764284972107626866.bucket_pairs.0.bin'... +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764284972107626866.bucket_pairs.1.bin'... +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764284972107626866.bucket_pairs.2.bin'... +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764284972107626866.bucket_pairs.3.bin'... +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764284972107626866.bucket_pairs.4.bin'... +num_singletons 209712929/296127983 (70.8183%) + == files to merge = 5 +num_written_pairs = 50000000 +num_written_pairs = 86415054 +=== step 3: 'build_index' 105.434 [sec] (17.7895 [ns/kmer]) +max_num_super_kmers_in_bucket 337356 +log2_max_num_super_kmers_in_bucket 19 +num_buckets_in_skew_index 171255/296127983 (0.0578314%) +num_partitions 7 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 91917727 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 81239918 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 73956430 + partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 64989870 + partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 53994155 + partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 44712864 + partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 337356: 118483803 +num_kmers_in_skew_index 529294767 (8.93055%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 91917727 + building MPHF with 64 threads and 256 partitions... + built mphs[0] for 91917727 keys; bits/key = 2.6615 + built positions[0] for 91917727 keys; bits/key = 7 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 81239918 + building MPHF with 64 threads and 256 partitions... + built mphs[1] for 81239918 keys; bits/key = 2.69372 + built positions[1] for 81239918 keys; bits/key = 8 + lower 256; upper 512; num_bits_per_pos 9; keys_in_partition.size() 73956430 + building MPHF with 64 threads and 256 partitions... + built mphs[2] for 73956430 keys; bits/key = 2.63856 + built positions[2] for 73956430 keys; bits/key = 9 + lower 512; upper 1024; num_bits_per_pos 10; keys_in_partition.size() 64989870 + building MPHF with 64 threads and 256 partitions... + built mphs[3] for 64989870 keys; bits/key = 2.63839 + built positions[3] for 64989870 keys; bits/key = 10 + lower 1024; upper 2048; num_bits_per_pos 11; keys_in_partition.size() 53994155 + building MPHF with 64 threads and 256 partitions... + built mphs[4] for 53994155 keys; bits/key = 2.68622 + built positions[4] for 53994155 keys; bits/key = 11 + lower 2048; upper 4096; num_bits_per_pos 12; keys_in_partition.size() 44712864 + building MPHF with 64 threads and 256 partitions... + built mphs[5] for 44712864 keys; bits/key = 2.73542 + built positions[5] for 44712864 keys; bits/key = 12 + lower 4096; upper 337356; num_bits_per_pos 19; keys_in_partition.size() 118483803 + building MPHF with 64 threads and 256 partitions... + built mphs[6] for 118483803 keys; bits/key = 2.59702 + built positions[6] for 118483803 keys; bits/key = 19 +num_bits_for_skew_index 7395671488(1.24784 [bits/kmer]) +=== step 4: 'build_skew_index' 112.318 [sec] (18.9508 [ns/kmer]) +=== total_time 1553.03 [sec] (262.035 [ns/kmer]) +total index size: 5749931032 [B] -- 5749.93 [MB] +SPACE BREAKDOWN: + minimizers: 0.135866 [bits/kmer] (2.71925 [bits/key]) -- 1.75056% + pieces: 0.105874 [bits/kmer] -- 1.36413% + num_super_kmers_before_bucket: 0.113871 [bits/kmer] -- 1.46717% + offsets: 2.91153 [bits/kmer] -- 37.5135% + strings: 3.2463 [bits/kmer] -- 41.8269% + skew_index: 1.24784 [bits/kmer] -- 16.0777% + weights: 2.48364e-07 [bits/kmer] -- 3.20004e-06% + weight_interval_values: 4.31937e-08 [bits/kmer] + weight_interval_lengths: 1.61977e-07 [bits/kmer] + weight_dictionary: 4.31937e-08 [bits/kmer] + -------------- + total: 7.76128 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 70.8183% +buckets with 2 super_kmers = 19.3879% +buckets with 3 super_kmers = 5.73771% +buckets with 4 super_kmers = 1.88619% +buckets with 5 super_kmers = 0.729827% +buckets with 6 super_kmers = 0.359355% +buckets with 7 super_kmers = 0.214283% +buckets with 8 super_kmers = 0.144866% +buckets with 9 super_kmers = 0.104897% +buckets with 10 super_kmers = 0.079962% +buckets with 11 super_kmers = 0.0625409% +buckets with 12 super_kmers = 0.0505312% +buckets with 13 super_kmers = 0.0411201% +buckets with 14 super_kmers = 0.0341018% +buckets with 15 super_kmers = 0.0289851% +buckets with 16 super_kmers = 0.0246778% +max_num_super_kmers_in_bucket 337356 +2025-11-28 00:13:10: saving data structure to disk... +2025-11-28 00:13:13: DONE diff --git a/benchmarks/results-27-11-25-v3/k63/canon-build.time.log b/benchmarks/results-27-11-25-v3/k63/canon-build.time.log new file mode 100644 index 0000000..a8632c5 --- /dev/null +++ b/benchmarks/results-27-11-25-v3/k63/canon-build.time.log @@ -0,0 +1,138 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k63.eulertigs.fa.gz -k 63 -m 24 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/cod.k63.canon.sshash --canonical-parsing" + User time (seconds): 163.56 + System time (seconds): 2.97 + Percent of CPU this job got: 116% + Elapsed (wall clock) time (h:mm:ss or m:ss): 2:22.57 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 1470924 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 855740 + Voluntary context switches: 784 + Involuntary context switches: 1258 + Swaps: 0 + File system inputs: 0 + File system outputs: 4177296 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k63.eulertigs.fa.gz -k 63 -m 24 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/kestrel.k63.canon.sshash --canonical-parsing" + User time (seconds): 305.10 + System time (seconds): 5.21 + Percent of CPU this job got: 105% + Elapsed (wall clock) time (h:mm:ss or m:ss): 4:55.17 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 3198680 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 1340855 + Voluntary context switches: 303 + Involuntary context switches: 1996 + Swaps: 0 + File system inputs: 0 + File system outputs: 8681744 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/human.k63.canon.sshash --canonical-parsing" + User time (seconds): 1148.22 + System time (seconds): 11.59 + Percent of CPU this job got: 159% + Elapsed (wall clock) time (h:mm:ss or m:ss): 12:06.11 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 7613392 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 2855970 + Voluntary context switches: 409 + Involuntary context switches: 5671 + Swaps: 0 + File system inputs: 0 + File system outputs: 21390512 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k63.eulertigs.fa.gz -k 63 -m 23 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/ncbi-virus.k63.canon.sshash --canonical-parsing" + User time (seconds): 107.35 + System time (seconds): 1.87 + Percent of CPU this job got: 103% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:45.21 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 1507376 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 597495 + Voluntary context switches: 279 + Involuntary context switches: 742 + Swaps: 0 + File system inputs: 0 + File system outputs: 2184144 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/se.k63.canon.sshash --canonical-parsing" + User time (seconds): 872.27 + System time (seconds): 9.30 + Percent of CPU this job got: 228% + Elapsed (wall clock) time (h:mm:ss or m:ss): 6:26.35 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 6451140 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 3222933 + Voluntary context switches: 410 + Involuntary context switches: 3692 + Swaps: 0 + File system inputs: 0 + File system outputs: 14468456 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k63.eulertigs.fa.gz -k 63 -m 31 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/hprc.k63.canon.sshash --canonical-parsing" + User time (seconds): 3760.19 + System time (seconds): 34.09 + Percent of CPU this job got: 243% + Elapsed (wall clock) time (h:mm:ss or m:ss): 25:57.25 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 16835436 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 9993840 + Voluntary context switches: 513 + Involuntary context switches: 13450 + Swaps: 0 + File system inputs: 0 + File system outputs: 59524008 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-27-11-25-v3/k63/canon-streaming-queries-high-hit.log b/benchmarks/results-27-11-25-v3/k63/canon-streaming-queries-high-hit.log new file mode 100644 index 0000000..3b8957a --- /dev/null +++ b/benchmarks/results-27-11-25-v3/k63/canon-streaming-queries-high-hit.log @@ -0,0 +1,48 @@ +2025-11-28 16:36:07: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz'... +2025-11-28 16:36:29: DONE +==== query report: +num_kmers = 97972416 +num_positive_kmers = 67275966 (68.6683%) +num_searches = 35527313/67275966 (52.8083%) +num_extensions = 31748653/67275966 (47.1917%) +elapsed = 22359.6 millisec / 22.3596 sec / 0.37266 min / 228.223 ns/kmer +2025-11-28 16:36:29: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz'... +2025-11-28 16:37:53: DONE +==== query report: +num_kmers = 461383839 +num_positive_kmers = 293470517 (63.6066%) +num_searches = 153492600/293470517 (52.3026%) +num_extensions = 139977917/293470517 (47.6974%) +elapsed = 83262.8 millisec / 83.2628 sec / 1.38771 min / 180.463 ns/kmer +2025-11-28 16:37:53: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2025-11-28 16:40:51: DONE +==== query report: +num_kmers = 477818474 +num_positive_kmers = 406529529 (85.0803%) +num_searches = 222074093/406529529 (54.6268%) +num_extensions = 184455436/406529529 (45.3732%) +elapsed = 177318 millisec / 177.318 sec / 2.9553 min / 371.098 ns/kmer +2025-11-28 16:40:51: performing queries from file '/mnt/hd2/pibiri/DNA/queries/ncbi-queries.fastq.gz'... +2025-11-28 16:40:53: DONE +==== query report: +num_kmers = 10330949 +num_positive_kmers = 10230224 (99.025%) +num_searches = 5533278/10230224 (54.0876%) +num_extensions = 4696946/10230224 (45.9124%) +elapsed = 2363.11 millisec / 2.36311 sec / 0.0393851 min / 228.741 ns/kmer +2025-11-28 16:40:54: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR27871075_1.fastq.gz'... +2025-11-28 16:55:59: DONE +==== query report: +num_kmers = 541466405 +num_positive_kmers = 507202856 (93.6721%) +num_searches = 321345723/507202856 (63.3564%) +num_extensions = 185857133/507202856 (36.6436%) +elapsed = 905024 millisec / 905.024 sec / 15.0837 min / 1671.43 ns/kmer +2025-11-28 16:56:01: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2025-11-28 16:59:25: DONE +==== query report: +num_kmers = 477818474 +num_positive_kmers = 434532302 (90.9409%) +num_searches = 241384562/434532302 (55.5504%) +num_extensions = 193147740/434532302 (44.4496%) +elapsed = 203222 millisec / 203.222 sec / 3.38703 min / 425.312 ns/kmer diff --git a/benchmarks/results-27-11-25-v3/k63/regular-bench.log b/benchmarks/results-27-11-25-v3/k63/regular-bench.log new file mode 100644 index 0000000..ac63f79 --- /dev/null +++ b/benchmarks/results-27-11-25-v3/k63/regular-bench.log @@ -0,0 +1,108 @@ +avg_nanosec_per_positive_lookup 1176.24 +avg_nanosec_per_negative_lookup 1295.01 +avg_nanosec_per_positive_lookup_advanced 1170.72 +avg_nanosec_per_negative_lookup_advanced 1282.84 +avg_nanosec_per_access 309.049 +iterator: avg_nanosec_per_kmer 13.8023 +avg_nanosec_per_positive_lookup 1177.06 +avg_nanosec_per_negative_lookup 1317.47 +avg_nanosec_per_positive_lookup_advanced 1184 +avg_nanosec_per_negative_lookup_advanced 1286.57 +avg_nanosec_per_access 307.443 +iterator: avg_nanosec_per_kmer 14.1317 +avg_nanosec_per_positive_lookup 1183.02 +avg_nanosec_per_negative_lookup 1320.87 +avg_nanosec_per_positive_lookup_advanced 1183.52 +avg_nanosec_per_negative_lookup_advanced 1255.32 +avg_nanosec_per_access 306.267 +iterator: avg_nanosec_per_kmer 13.938 +avg_nanosec_per_positive_lookup 1000.99 +avg_nanosec_per_negative_lookup 1402.82 +avg_nanosec_per_positive_lookup_advanced 1015.78 +avg_nanosec_per_negative_lookup_advanced 1334.26 +avg_nanosec_per_access 268.359 +iterator: avg_nanosec_per_kmer 13.7447 +avg_nanosec_per_positive_lookup 997.474 +avg_nanosec_per_negative_lookup 1377.26 +avg_nanosec_per_positive_lookup_advanced 1028.81 +avg_nanosec_per_negative_lookup_advanced 1351.41 +avg_nanosec_per_access 270.65 +iterator: avg_nanosec_per_kmer 13.7468 +avg_nanosec_per_positive_lookup 1024.66 +avg_nanosec_per_negative_lookup 1378.64 +avg_nanosec_per_positive_lookup_advanced 1024.4 +avg_nanosec_per_negative_lookup_advanced 1358.3 +avg_nanosec_per_access 266.032 +iterator: avg_nanosec_per_kmer 14.2729 +avg_nanosec_per_positive_lookup 1554.36 +avg_nanosec_per_negative_lookup 1703.44 +avg_nanosec_per_positive_lookup_advanced 1523.25 +avg_nanosec_per_negative_lookup_advanced 1673.06 +avg_nanosec_per_access 355.779 +iterator: avg_nanosec_per_kmer 13.8737 +avg_nanosec_per_positive_lookup 1524.32 +avg_nanosec_per_negative_lookup 1722.17 +avg_nanosec_per_positive_lookup_advanced 1526.09 +avg_nanosec_per_negative_lookup_advanced 1685.69 +avg_nanosec_per_access 353.206 +iterator: avg_nanosec_per_kmer 13.8786 +avg_nanosec_per_positive_lookup 1541.9 +avg_nanosec_per_negative_lookup 1723.01 +avg_nanosec_per_positive_lookup_advanced 1533.71 +avg_nanosec_per_negative_lookup_advanced 1699.78 +avg_nanosec_per_access 358.16 +iterator: avg_nanosec_per_kmer 14.1515 +avg_nanosec_per_positive_lookup 938.321 +avg_nanosec_per_negative_lookup 1267.15 +avg_nanosec_per_positive_lookup_advanced 937.563 +avg_nanosec_per_negative_lookup_advanced 1251.9 +avg_nanosec_per_access 304.162 +iterator: avg_nanosec_per_kmer 14.1522 +avg_nanosec_per_positive_lookup 939.518 +avg_nanosec_per_negative_lookup 1280.11 +avg_nanosec_per_positive_lookup_advanced 943.774 +avg_nanosec_per_negative_lookup_advanced 1239.15 +avg_nanosec_per_access 300.735 +iterator: avg_nanosec_per_kmer 14.1745 +avg_nanosec_per_positive_lookup 942.911 +avg_nanosec_per_negative_lookup 1263.11 +avg_nanosec_per_positive_lookup_advanced 933 +avg_nanosec_per_negative_lookup_advanced 1248.72 +avg_nanosec_per_access 300.821 +iterator: avg_nanosec_per_kmer 13.8817 +avg_nanosec_per_positive_lookup 2832.14 +avg_nanosec_per_negative_lookup 2065.61 +avg_nanosec_per_positive_lookup_advanced 2812.98 +avg_nanosec_per_negative_lookup_advanced 2043.82 +avg_nanosec_per_access 435.503 +iterator: avg_nanosec_per_kmer 14.261 +avg_nanosec_per_positive_lookup 2828.23 +avg_nanosec_per_negative_lookup 2070.98 +avg_nanosec_per_positive_lookup_advanced 2829.25 +avg_nanosec_per_negative_lookup_advanced 2030.81 +avg_nanosec_per_access 432.514 +iterator: avg_nanosec_per_kmer 14.214 +avg_nanosec_per_positive_lookup 2853.67 +avg_nanosec_per_negative_lookup 2091.92 +avg_nanosec_per_positive_lookup_advanced 2860.96 +avg_nanosec_per_negative_lookup_advanced 2050.07 +avg_nanosec_per_access 435.452 +iterator: avg_nanosec_per_kmer 14.1743 +avg_nanosec_per_positive_lookup 2587.23 +avg_nanosec_per_negative_lookup 2629.75 +avg_nanosec_per_positive_lookup_advanced 2543.11 +avg_nanosec_per_negative_lookup_advanced 2579.34 +avg_nanosec_per_access 683.668 +iterator: avg_nanosec_per_kmer 14.493 +avg_nanosec_per_positive_lookup 2603.43 +avg_nanosec_per_negative_lookup 2622.83 +avg_nanosec_per_positive_lookup_advanced 2576.13 +avg_nanosec_per_negative_lookup_advanced 2602.66 +avg_nanosec_per_access 682.111 +iterator: avg_nanosec_per_kmer 14.1579 +avg_nanosec_per_positive_lookup 2565.77 +avg_nanosec_per_negative_lookup 2608.12 +avg_nanosec_per_positive_lookup_advanced 2554.08 +avg_nanosec_per_negative_lookup_advanced 2588.21 +avg_nanosec_per_access 680.79 +iterator: avg_nanosec_per_kmer 14.1209 diff --git a/benchmarks/results-27-11-25-v3/k63/regular-build.log b/benchmarks/results-27-11-25-v3/k63/regular-build.log new file mode 100644 index 0000000..96d31ef --- /dev/null +++ b/benchmarks/results-27-11-25-v3/k63/regular-build.log @@ -0,0 +1,1428 @@ +k = 63, m = 24, seed = 1, l = 6, c = 3, canonical_parsing = false, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/cod.k63.eulertigs.fa.gz'... +m_buffer_size 29411764 +read 100000 sequences, 61951098 bases, 55751160 kmers +read 200000 sequences, 122972836 bases, 110572898 kmers +read 300000 sequences, 183599791 bases, 164999853 kmers +read 400000 sequences, 245140055 bases, 220340117 kmers +read 500000 sequences, 306871655 bases, 275871717 kmers +read 600000 sequences, 368254748 bases, 331054810 kmers +read 700000 sequences, 430272349 bases, 386872411 kmers +read 800000 sequences, 496894402 bases, 447294464 kmers +read 900000 sequences, 580548812 bases, 524748874 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764279985214394756.minimizers.0.bin'... +read 954555 sequences, 615768068 bases, 556585658 kmers +num_kmers 556585658 +num_super_kmers 28019724 +num_pieces 954556 (+0.212663 [bits/kmer]) +=== step 1: 'parse_file' 82.1284 [sec] (147.557 [ns/kmer]) +num_minimizers 24561600 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 0.900406 [sec] (1.61773 [ns/kmer]) +bits_per_offset = ceil(log2(615768131)) = 30 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280068275170184.bucket_pairs.0.bin'... +num_singletons 23591216/24561600 (96.0492%) +=== step 3: 'build_index' 3.35811 [sec] (6.03341 [ns/kmer]) +max_num_super_kmers_in_bucket 19521 +log2_max_num_super_kmers_in_bucket 15 +num_buckets_in_skew_index 3503/24561600 (0.0142621%) +num_partitions 7 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 3715679 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 2564133 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 1403486 + partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 1115400 + partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 1042641 + partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 882051 + partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 19521: 1991148 +num_kmers_in_skew_index 12714538 (2.28438%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 3715679 + building MPHF with 64 threads and 256 partitions... + built mphs[0] for 3715679 keys; bits/key = 3.23769 + built positions[0] for 3715679 keys; bits/key = 7.0001 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 2564133 + building MPHF with 64 threads and 256 partitions... + built mphs[1] for 2564133 keys; bits/key = 3.43225 + built positions[1] for 2564133 keys; bits/key = 8.00013 + lower 256; upper 512; num_bits_per_pos 9; keys_in_partition.size() 1403486 + building MPHF with 64 threads and 70 partitions... + built mphs[2] for 1403486 keys; bits/key = 3.07059 + built positions[2] for 1403486 keys; bits/key = 9.00026 + lower 512; upper 1024; num_bits_per_pos 10; keys_in_partition.size() 1115400 + building MPHF with 64 threads and 55 partitions... + built mphs[3] for 1115400 keys; bits/key = 3.01221 + built positions[3] for 1115400 keys; bits/key = 10.0003 + lower 1024; upper 2048; num_bits_per_pos 11; keys_in_partition.size() 1042641 + building MPHF with 64 threads and 52 partitions... + built mphs[4] for 1042641 keys; bits/key = 3.00144 + built positions[4] for 1042641 keys; bits/key = 11.0003 + lower 2048; upper 4096; num_bits_per_pos 12; keys_in_partition.size() 882051 + building MPHF with 64 threads and 44 partitions... + built mphs[5] for 882051 keys; bits/key = 2.98863 + built positions[5] for 882051 keys; bits/key = 12.0004 + lower 4096; upper 19521; num_bits_per_pos 15; keys_in_partition.size() 1991148 + building MPHF with 64 threads and 99 partitions... + built mphs[6] for 1991148 keys; bits/key = 3.13438 + built positions[6] for 1991148 keys; bits/key = 15.0002 +num_bits_for_skew_index 162738608(0.292387 [bits/kmer]) +=== step 4: 'build_skew_index' 1.17985 [sec] (2.1198 [ns/kmer]) +=== total_time 87.5667 [sec] (157.328 [ns/kmer]) +total index size: 294488430 [B] -- 294.488 [MB] +SPACE BREAKDOWN: + minimizers: 0.120807 [bits/kmer] (2.73757 [bits/key]) -- 2.85407% + pieces: 0.0214933 [bits/kmer] -- 0.507781% + num_super_kmers_before_bucket: 0.0751667 [bits/kmer] -- 1.77582% + offsets: 1.51027 [bits/kmer] -- 35.6802% + strings: 2.21266 [bits/kmer] -- 52.2744% + skew_index: 0.292387 [bits/kmer] -- 6.90768% + weights: 2.6447e-06 [bits/kmer] -- 6.24812e-05% + weight_interval_values: 4.59947e-07 [bits/kmer] + weight_interval_lengths: 1.7248e-06 [bits/kmer] + weight_dictionary: 4.59947e-07 [bits/kmer] + -------------- + total: 4.23279 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 96.0492% +buckets with 2 super_kmers = 2.2644% +buckets with 3 super_kmers = 0.694128% +buckets with 4 super_kmers = 0.311437% +buckets with 5 super_kmers = 0.170913% +buckets with 6 super_kmers = 0.106434% +buckets with 7 super_kmers = 0.0716566% +buckets with 8 super_kmers = 0.0519307% +buckets with 9 super_kmers = 0.0388045% +buckets with 10 super_kmers = 0.0306698% +buckets with 11 super_kmers = 0.0247337% +buckets with 12 super_kmers = 0.0199865% +buckets with 13 super_kmers = 0.0164403% +buckets with 14 super_kmers = 0.0137857% +buckets with 15 super_kmers = 0.0117297% +buckets with 16 super_kmers = 0.0100197% +max_num_super_kmers_in_bucket 19521 +2025-11-27 22:47:52: saving data structure to disk... +2025-11-27 22:47:53: DONE +k = 63, m = 24, seed = 1, l = 6, c = 3, canonical_parsing = false, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/kestrel.k63.eulertigs.fa.gz'... +m_buffer_size 29411764 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280073158799178.minimizers.0.bin'... +read 100000 sequences, 726199521 bases, 719999583 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280073158799178.minimizers.1.bin'... +read 155784 sequences, 1164909275 bases, 1155250667 kmers +num_kmers 1155250667 +num_super_kmers 56506979 +num_pieces 155785 (+0.0167213 [bits/kmer]) +=== step 1: 'parse_file' 162.557 [sec] (140.712 [ns/kmer]) + == files to merge = 2 +num_written_tuples = 50000000 +num_written_tuples = 56506979 +num_minimizers 55965215 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 3.41297 [sec] (2.95432 [ns/kmer]) +bits_per_offset = ceil(log2(1164909338)) = 31 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280239191429978.bucket_pairs.0.bin'... +num_singletons 55755989/55965215 (99.6261%) +=== step 3: 'build_index' 9.03346 [sec] (7.81948 [ns/kmer]) +max_num_super_kmers_in_bucket 813 +log2_max_num_super_kmers_in_bucket 10 +num_buckets_in_skew_index 500/55965215 (0.000893412%) +num_partitions 4 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 589695 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 468254 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 205576 + partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 813: 80864 +num_kmers_in_skew_index 1344389 (0.116372%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 589695 + building MPHF with 64 threads and 29 partitions... + built mphs[0] for 589695 keys; bits/key = 2.963 + built positions[0] for 589695 keys; bits/key = 7.00055 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 468254 + building MPHF with 64 threads and 23 partitions... + built mphs[1] for 468254 keys; bits/key = 2.95604 + built positions[1] for 468254 keys; bits/key = 8.00072 + lower 256; upper 512; num_bits_per_pos 9; keys_in_partition.size() 205576 + building MPHF with 64 threads and 10 partitions... + built mphs[2] for 205576 keys; bits/key = 2.92423 + built positions[2] for 205576 keys; bits/key = 9.00183 + lower 512; upper 813; num_bits_per_pos 10; keys_in_partition.size() 80864 + building MPHF with 64 threads and 4 partitions... + built mphs[3] for 80864 keys; bits/key = 2.94796 + built positions[3] for 80864 keys; bits/key = 10.004 +num_bits_for_skew_index 14505248(0.0125559 [bits/kmer]) +=== step 4: 'build_skew_index' 0.345694 [sec] (0.299237 [ns/kmer]) +=== total_time 175.35 [sec] (151.785 [ns/kmer]) +total index size: 542513778 [B] -- 542.514 [MB] +SPACE BREAKDOWN: + minimizers: 0.13289 [bits/kmer] (2.74315 [bits/key]) -- 3.53726% + pieces: 0.00221454 [bits/kmer] -- 0.0589467% + num_super_kmers_before_bucket: 0.076164 [bits/kmer] -- 2.02733% + offsets: 1.51631 [bits/kmer] -- 40.3611% + strings: 2.01672 [bits/kmer] -- 53.6811% + skew_index: 0.0125559 [bits/kmer] -- 0.334214% + weights: 1.27418e-06 [bits/kmer] -- 3.39162e-05% + weight_interval_values: 2.21597e-07 [bits/kmer] + weight_interval_lengths: 8.30988e-07 [bits/kmer] + weight_dictionary: 2.21597e-07 [bits/kmer] + -------------- + total: 3.75686 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 99.6261% +buckets with 2 super_kmers = 0.251186% +buckets with 3 super_kmers = 0.0533975% +buckets with 4 super_kmers = 0.0222996% +buckets with 5 super_kmers = 0.0121754% +buckets with 6 super_kmers = 0.00769585% +buckets with 7 super_kmers = 0.00507994% +buckets with 8 super_kmers = 0.0035933% +buckets with 9 super_kmers = 0.00276243% +buckets with 10 super_kmers = 0.00200839% +buckets with 11 super_kmers = 0.00156704% +buckets with 12 super_kmers = 0.00133118% +buckets with 13 super_kmers = 0.00111498% +buckets with 14 super_kmers = 0.000980966% +buckets with 15 super_kmers = 0.000816579% +buckets with 16 super_kmers = 0.00071473% +max_num_super_kmers_in_bucket 813 +2025-11-27 22:50:48: saving data structure to disk... +2025-11-27 22:50:48: DONE +k = 63, m = 25, seed = 1, l = 6, c = 3, canonical_parsing = false, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz'... +m_buffer_size 29411764 +read 100000 sequences, 156072595 bases, 149872657 kmers +read 200000 sequences, 358422338 bases, 346022400 kmers +read 300000 sequences, 483978517 bases, 465378579 kmers +read 400000 sequences, 579661118 bases, 554861180 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280248970220538.minimizers.0.bin'... +read 500000 sequences, 676694662 bases, 645694724 kmers +read 600000 sequences, 771042496 bases, 733842558 kmers +read 700000 sequences, 867361949 bases, 823962011 kmers +read 800000 sequences, 963947999 bases, 914348061 kmers +read 900000 sequences, 1061593876 bases, 1005793938 kmers +read 1000000 sequences, 1159508767 bases, 1097508829 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280248970220538.minimizers.1.bin'... +read 1100000 sequences, 1258471359 bases, 1190271421 kmers +read 1200000 sequences, 1356700107 bases, 1282300169 kmers +read 1300000 sequences, 1455266386 bases, 1374666448 kmers +read 1400000 sequences, 1554765716 bases, 1467965778 kmers +read 1500000 sequences, 1655325517 bases, 1562325579 kmers +read 1600000 sequences, 1756677712 bases, 1657477774 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280248970220538.minimizers.2.bin'... +read 1700000 sequences, 1857972502 bases, 1752572564 kmers +read 1800000 sequences, 1959725889 bases, 1848125951 kmers +read 1900000 sequences, 2064608705 bases, 1946808767 kmers +read 2000000 sequences, 2171598469 bases, 2047598531 kmers +read 2100000 sequences, 2280349838 bases, 2150149900 kmers +read 2200000 sequences, 2390837388 bases, 2254437450 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280248970220538.minimizers.3.bin'... +read 2300000 sequences, 2504101994 bases, 2361502056 kmers +read 2400000 sequences, 2621983258 bases, 2473183320 kmers +read 2500000 sequences, 2745887962 bases, 2590888024 kmers +read 2600000 sequences, 2875578557 bases, 2714378619 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280248970220538.minimizers.4.bin'... +read 2642917 sequences, 2935176947 bases, 2771316093 kmers +num_kmers 2771316093 +num_super_kmers 141095110 +num_pieces 2642918 (+0.118255 [bits/kmer]) +=== step 1: 'parse_file' 385.783 [sec] (139.206 [ns/kmer]) + == files to merge = 5 +num_written_tuples = 50000000 +num_written_tuples = 100000000 +num_written_tuples = 141095110 +num_minimizers 123318771 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 8.58189 [sec] (3.09669 [ns/kmer]) +bits_per_offset = ceil(log2(2935177010)) = 32 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280643492926440.bucket_pairs.0.bin'... +num_singletons 120176252/123318771 (97.4517%) +=== step 3: 'build_index' 27.4853 [sec] (9.91779 [ns/kmer]) +max_num_super_kmers_in_bucket 41894 +log2_max_num_super_kmers_in_bucket 16 +num_buckets_in_skew_index 28626/123318771 (0.023213%) +num_partitions 7 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 25290718 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 22959952 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 20403616 + partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 19208760 + partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 17090734 + partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 12440370 + partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 41894: 24078745 +num_kmers_in_skew_index 141472895 (5.1049%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 25290718 + building MPHF with 64 threads and 256 partitions... + built mphs[0] for 25290718 keys; bits/key = 2.78609 + built positions[0] for 25290718 keys; bits/key = 7.00001 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 22959952 + building MPHF with 64 threads and 256 partitions... + built mphs[1] for 22959952 keys; bits/key = 2.81272 + built positions[1] for 22959952 keys; bits/key = 8.00001 + lower 256; upper 512; num_bits_per_pos 9; keys_in_partition.size() 20403616 + building MPHF with 64 threads and 256 partitions... + built mphs[2] for 20403616 keys; bits/key = 2.848 + built positions[2] for 20403616 keys; bits/key = 9.00002 + lower 512; upper 1024; num_bits_per_pos 10; keys_in_partition.size() 19208760 + building MPHF with 64 threads and 256 partitions... + built mphs[3] for 19208760 keys; bits/key = 2.8638 + built positions[3] for 19208760 keys; bits/key = 10 + lower 1024; upper 2048; num_bits_per_pos 11; keys_in_partition.size() 17090734 + building MPHF with 64 threads and 256 partitions... + built mphs[4] for 17090734 keys; bits/key = 2.90286 + built positions[4] for 17090734 keys; bits/key = 11 + lower 2048; upper 4096; num_bits_per_pos 12; keys_in_partition.size() 12440370 + building MPHF with 64 threads and 256 partitions... + built mphs[5] for 12440370 keys; bits/key = 2.99662 + built positions[5] for 12440370 keys; bits/key = 12 + lower 4096; upper 41894; num_bits_per_pos 16; keys_in_partition.size() 24078745 + building MPHF with 64 threads and 256 partitions... + built mphs[6] for 24078745 keys; bits/key = 2.79854 + built positions[6] for 24078745 keys; bits/key = 16 +num_bits_for_skew_index 1861418064(0.671673 [bits/kmer]) +=== step 4: 'build_skew_index' 17.6011 [sec] (6.35118 [ns/kmer]) +=== total_time 439.451 [sec] (158.571 [ns/kmer]) +total index size: 1602434288 [B] -- 1602.43 [MB] +SPACE BREAKDOWN: + minimizers: 0.11805 [bits/kmer] (2.65292 [bits/key]) -- 2.55202% + pieces: 0.0126433 [bits/kmer] -- 0.273323% + num_super_kmers_before_bucket: 0.0759434 [bits/kmer] -- 1.64174% + offsets: 1.62921 [bits/kmer] -- 35.2202% + strings: 2.11825 [bits/kmer] -- 45.7925% + skew_index: 0.671673 [bits/kmer] -- 14.5202% + weights: 5.31156e-07 [bits/kmer] -- 1.14825e-05% + weight_interval_values: 9.23749e-08 [bits/kmer] + weight_interval_lengths: 3.46406e-07 [bits/kmer] + weight_dictionary: 9.23749e-08 [bits/kmer] + -------------- + total: 4.62577 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 97.4517% +buckets with 2 super_kmers = 1.47013% +buckets with 3 super_kmers = 0.404837% +buckets with 4 super_kmers = 0.185024% +buckets with 5 super_kmers = 0.10586% +buckets with 6 super_kmers = 0.0683651% +buckets with 7 super_kmers = 0.0481184% +buckets with 8 super_kmers = 0.0354861% +buckets with 9 super_kmers = 0.0274881% +buckets with 10 super_kmers = 0.0217282% +buckets with 11 super_kmers = 0.0175805% +buckets with 12 super_kmers = 0.0146612% +buckets with 13 super_kmers = 0.0123679% +buckets with 14 super_kmers = 0.0106529% +buckets with 15 super_kmers = 0.00919244% +buckets with 16 super_kmers = 0.00788769% +max_num_super_kmers_in_bucket 41894 +2025-11-27 22:58:08: saving data structure to disk... +2025-11-27 22:58:09: DONE +k = 63, m = 23, seed = 1, l = 6, c = 3, canonical_parsing = false, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k63.eulertigs.fa.gz'... +m_buffer_size 29411764 +read 100000 sequences, 48527990 bases, 42328052 kmers +read 200000 sequences, 101938174 bases, 89538236 kmers +read 300000 sequences, 171738001 bases, 153138063 kmers +read 400000 sequences, 279668649 bases, 254868711 kmers +read 500000 sequences, 322024377 bases, 291024439 kmers +read 600000 sequences, 367040069 bases, 329840131 kmers +read 700000 sequences, 412232472 bases, 368832534 kmers +read 800000 sequences, 457488794 bases, 407888856 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280689608027502.minimizers.0.bin'... +read 800231 sequences, 462130202 bases, 412515880 kmers +num_kmers 412515880 +num_super_kmers 20404397 +num_pieces 800232 (+0.240545 [bits/kmer]) +=== step 1: 'parse_file' 60.2141 [sec] (145.968 [ns/kmer]) +num_minimizers 18537049 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 0.674712 [sec] (1.6356 [ns/kmer]) +bits_per_offset = ceil(log2(462130265)) = 29 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280750526933329.bucket_pairs.0.bin'... +num_singletons 17320176/18537049 (93.4355%) +=== step 3: 'build_index' 2.42641 [sec] (5.88197 [ns/kmer]) +max_num_super_kmers_in_bucket 382 +log2_max_num_super_kmers_in_bucket 9 +num_buckets_in_skew_index 12/18537049 (6.47352e-05%) +num_partitions 3 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 19499 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 4829 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 382: 5558 +num_kmers_in_skew_index 29886 (0.00724481%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 19499 + building MPHF with 64 threads and 1 partitions... + built mphs[0] for 19499 keys; bits/key = 2.78578 + built positions[0] for 19499 keys; bits/key = 7.01739 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 4829 + building MPHF with 64 threads and 1 partitions... + built mphs[1] for 4829 keys; bits/key = 3.70429 + built positions[1] for 4829 keys; bits/key = 8.07124 + lower 256; upper 382; num_bits_per_pos 9; keys_in_partition.size() 5558 + building MPHF with 64 threads and 1 partitions... + built mphs[2] for 5558 keys; bits/key = 3.56963 + built positions[2] for 5558 keys; bits/key = 9.06225 +num_bits_for_skew_index 318416(0.000771888 [bits/kmer]) +=== step 4: 'build_skew_index' 0.104181 [sec] (0.25255 [ns/kmer]) +=== total_time 63.4194 [sec] (153.738 [ns/kmer]) +total index size: 200950900 [B] -- 200.951 [MB] +SPACE BREAKDOWN: + minimizers: 0.122667 [bits/kmer] (2.72979 [bits/key]) -- 3.14767% + pieces: 0.0239116 [bits/kmer] -- 0.613578% + num_super_kmers_before_bucket: 0.0747427 [bits/kmer] -- 1.91792% + offsets: 1.43444 [bits/kmer] -- 36.808% + strings: 2.24055 [bits/kmer] -- 57.4929% + skew_index: 0.000771888 [bits/kmer] -- 0.0198068% + weights: 3.56835e-06 [bits/kmer] -- 9.15647e-05% + weight_interval_values: 6.20582e-07 [bits/kmer] + weight_interval_lengths: 2.32718e-06 [bits/kmer] + weight_dictionary: 6.20582e-07 [bits/kmer] + -------------- + total: 3.89708 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 93.4355% +buckets with 2 super_kmers = 4.75754% +buckets with 3 super_kmers = 1.08068% +buckets with 4 super_kmers = 0.365285% +buckets with 5 super_kmers = 0.156853% +buckets with 6 super_kmers = 0.0795488% +buckets with 7 super_kmers = 0.0437664% +buckets with 8 super_kmers = 0.0260397% +buckets with 9 super_kmers = 0.0170469% +buckets with 10 super_kmers = 0.0111776% +buckets with 11 super_kmers = 0.00762797% +buckets with 12 super_kmers = 0.0052975% +buckets with 13 super_kmers = 0.00350649% +buckets with 14 super_kmers = 0.00244915% +buckets with 15 super_kmers = 0.00197442% +buckets with 16 super_kmers = 0.00134865% +max_num_super_kmers_in_bucket 382 +2025-11-27 22:59:13: saving data structure to disk... +2025-11-27 22:59:13: DONE +k = 63, m = 31, seed = 1, l = 6, c = 3, canonical_parsing = false, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz'... +m_buffer_size 29411764 +read 100000 sequences, 29016582 bases, 22816644 kmers +read 200000 sequences, 59512666 bases, 47112728 kmers +read 300000 sequences, 93672597 bases, 75072659 kmers +read 400000 sequences, 116504475 bases, 91704537 kmers +read 500000 sequences, 129640659 bases, 98640721 kmers +read 600000 sequences, 142888268 bases, 105688330 kmers +read 700000 sequences, 156121434 bases, 112721496 kmers +read 800000 sequences, 169296056 bases, 119696118 kmers +read 900000 sequences, 182473552 bases, 126673614 kmers +read 1000000 sequences, 195596530 bases, 133596592 kmers +read 1100000 sequences, 208767758 bases, 140567820 kmers +read 1200000 sequences, 222206136 bases, 147806198 kmers +read 1300000 sequences, 235298907 bases, 154698969 kmers +read 1400000 sequences, 248482551 bases, 161682613 kmers +read 1500000 sequences, 261588927 bases, 168588989 kmers +read 1600000 sequences, 275055123 bases, 175855185 kmers +read 1700000 sequences, 288035774 bases, 182635836 kmers +read 1800000 sequences, 301349714 bases, 189749776 kmers +read 1900000 sequences, 314875277 bases, 197075339 kmers +read 2000000 sequences, 328155322 bases, 204155384 kmers +read 2100000 sequences, 341368561 bases, 211168623 kmers +read 2200000 sequences, 354712477 bases, 218312539 kmers +read 2300000 sequences, 368071956 bases, 225472018 kmers +read 2400000 sequences, 381369058 bases, 232569120 kmers +read 2500000 sequences, 395076617 bases, 240076679 kmers +read 2600000 sequences, 408571137 bases, 247371199 kmers +read 2700000 sequences, 421770863 bases, 254370925 kmers +read 2800000 sequences, 434931009 bases, 261331071 kmers +read 2900000 sequences, 448298966 bases, 268499028 kmers +read 3000000 sequences, 461539188 bases, 275539250 kmers +read 3100000 sequences, 474884720 bases, 282684782 kmers +read 3200000 sequences, 488437755 bases, 290037817 kmers +read 3300000 sequences, 501681431 bases, 297081493 kmers +read 3400000 sequences, 514981599 bases, 304181661 kmers +read 3500000 sequences, 528308841 bases, 311308903 kmers +read 3600000 sequences, 541636881 bases, 318436943 kmers +read 3700000 sequences, 555162303 bases, 325762365 kmers +read 3800000 sequences, 568696721 bases, 333096783 kmers +read 3900000 sequences, 581917274 bases, 340117336 kmers +read 4000000 sequences, 595230685 bases, 347230747 kmers +read 4100000 sequences, 608987239 bases, 354787301 kmers +read 4200000 sequences, 622285126 bases, 361885188 kmers +read 4300000 sequences, 635647812 bases, 369047874 kmers +read 4400000 sequences, 648992570 bases, 376192632 kmers +read 4500000 sequences, 662513011 bases, 383513073 kmers +read 4600000 sequences, 675930610 bases, 390730672 kmers +read 4700000 sequences, 689630791 bases, 398230853 kmers +read 4800000 sequences, 702998442 bases, 405398504 kmers +read 4900000 sequences, 716508958 bases, 412709020 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280753237549641.minimizers.0.bin'... +read 5000000 sequences, 730213651 bases, 420213713 kmers +read 5100000 sequences, 743782088 bases, 427582150 kmers +read 5200000 sequences, 757644219 bases, 435244281 kmers +read 5300000 sequences, 771125055 bases, 442525117 kmers +read 5400000 sequences, 784690986 bases, 449891048 kmers +read 5500000 sequences, 798294730 bases, 457294792 kmers +read 5600000 sequences, 811868508 bases, 464668570 kmers +read 5700000 sequences, 825311135 bases, 471911197 kmers +read 5800000 sequences, 838965446 bases, 479365508 kmers +read 5900000 sequences, 852651285 bases, 486851347 kmers +read 6000000 sequences, 866286122 bases, 494286184 kmers +read 6100000 sequences, 879791056 bases, 501591118 kmers +read 6200000 sequences, 893568449 bases, 509168511 kmers +read 6300000 sequences, 907221007 bases, 516621069 kmers +read 6400000 sequences, 920850613 bases, 524050675 kmers +read 6500000 sequences, 934451411 bases, 531451473 kmers +read 6600000 sequences, 948058118 bases, 538858180 kmers +read 6700000 sequences, 961671957 bases, 546272019 kmers +read 6800000 sequences, 975396455 bases, 553796517 kmers +read 6900000 sequences, 989058845 bases, 561258907 kmers +read 7000000 sequences, 1002901429 bases, 568901491 kmers +read 7100000 sequences, 1016671629 bases, 576471691 kmers +read 7200000 sequences, 1030411544 bases, 584011606 kmers +read 7300000 sequences, 1044405065 bases, 591805127 kmers +read 7400000 sequences, 1058405891 bases, 599605953 kmers +read 7500000 sequences, 1072419133 bases, 607419195 kmers +read 7600000 sequences, 1086161468 bases, 614961530 kmers +read 7700000 sequences, 1100358314 bases, 622958376 kmers +read 7800000 sequences, 1114260927 bases, 630660989 kmers +read 7900000 sequences, 1128307710 bases, 638507772 kmers +read 8000000 sequences, 1142393895 bases, 646393957 kmers +read 8100000 sequences, 1156356962 bases, 654157024 kmers +read 8200000 sequences, 1170571834 bases, 662171896 kmers +read 8300000 sequences, 1184451092 bases, 669851154 kmers +read 8400000 sequences, 1198299201 bases, 677499263 kmers +read 8500000 sequences, 1212442291 bases, 685442353 kmers +read 8600000 sequences, 1226404988 bases, 693205050 kmers +read 8700000 sequences, 1240702863 bases, 701302925 kmers +read 8800000 sequences, 1254656858 bases, 709056920 kmers +read 8900000 sequences, 1268621255 bases, 716821317 kmers +read 9000000 sequences, 1282878920 bases, 724878982 kmers +read 9100000 sequences, 1297350483 bases, 733150545 kmers +read 9200000 sequences, 1311515942 bases, 741116004 kmers +read 9300000 sequences, 1325689602 bases, 749089664 kmers +read 9400000 sequences, 1339681841 bases, 756881903 kmers +read 9500000 sequences, 1353794253 bases, 764794315 kmers +read 9600000 sequences, 1368052405 bases, 772852467 kmers +read 9700000 sequences, 1382480965 bases, 781081027 kmers +read 9800000 sequences, 1397029040 bases, 789429102 kmers +read 9900000 sequences, 1411261749 bases, 797461811 kmers +read 10000000 sequences, 1425636281 bases, 805636343 kmers +read 10100000 sequences, 1439978068 bases, 813778130 kmers +read 10200000 sequences, 1454565939 bases, 822166001 kmers +read 10300000 sequences, 1469139911 bases, 830539973 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280753237549641.minimizers.1.bin'... +read 10400000 sequences, 1483590146 bases, 838790208 kmers +read 10500000 sequences, 1497966602 bases, 846966664 kmers +read 10600000 sequences, 1512656597 bases, 855456659 kmers +read 10700000 sequences, 1527179829 bases, 863779891 kmers +read 10800000 sequences, 1541782704 bases, 872182766 kmers +read 10900000 sequences, 1556446136 bases, 880646198 kmers +read 11000000 sequences, 1571132909 bases, 889132971 kmers +read 11100000 sequences, 1585816988 bases, 897617050 kmers +read 11200000 sequences, 1600557306 bases, 906157368 kmers +read 11300000 sequences, 1615262515 bases, 914662577 kmers +read 11400000 sequences, 1629999559 bases, 923199621 kmers +read 11500000 sequences, 1644860500 bases, 931860562 kmers +read 11600000 sequences, 1659779470 bases, 940579532 kmers +read 11700000 sequences, 1674792249 bases, 949392311 kmers +read 11800000 sequences, 1689736650 bases, 958136712 kmers +read 11900000 sequences, 1704892540 bases, 967092602 kmers +read 12000000 sequences, 1720028604 bases, 976028666 kmers +read 12100000 sequences, 1735135339 bases, 984935401 kmers +read 12200000 sequences, 1750296958 bases, 993897020 kmers +read 12300000 sequences, 1765691210 bases, 1003091272 kmers +read 12400000 sequences, 1781168535 bases, 1012368597 kmers +read 12500000 sequences, 1796591631 bases, 1021591693 kmers +read 12600000 sequences, 1811900898 bases, 1030700960 kmers +read 12700000 sequences, 1827876385 bases, 1040476447 kmers +read 12800000 sequences, 1843393661 bases, 1049793723 kmers +read 12900000 sequences, 1858842530 bases, 1059042592 kmers +read 13000000 sequences, 1874571274 bases, 1068571336 kmers +read 13100000 sequences, 1890244064 bases, 1078044126 kmers +read 13200000 sequences, 1905959037 bases, 1087559099 kmers +read 13300000 sequences, 1922228213 bases, 1097628275 kmers +read 13400000 sequences, 1938519944 bases, 1107720006 kmers +read 13500000 sequences, 1954664776 bases, 1117664838 kmers +read 13600000 sequences, 1970696460 bases, 1127496522 kmers +read 13700000 sequences, 1987164724 bases, 1137764786 kmers +read 13800000 sequences, 2003371237 bases, 1147771299 kmers +read 13900000 sequences, 2019649705 bases, 1157849767 kmers +read 14000000 sequences, 2036208623 bases, 1168208685 kmers +read 14100000 sequences, 2052985966 bases, 1178786028 kmers +read 14200000 sequences, 2069712096 bases, 1189312158 kmers +read 14300000 sequences, 2086760464 bases, 1200160526 kmers +read 14400000 sequences, 2103719550 bases, 1210919612 kmers +read 14500000 sequences, 2120768440 bases, 1221768502 kmers +read 14600000 sequences, 2137973451 bases, 1232773513 kmers +read 14700000 sequences, 2155333447 bases, 1243933509 kmers +read 14800000 sequences, 2173028894 bases, 1255428956 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280753237549641.minimizers.2.bin'... +read 14900000 sequences, 2190761817 bases, 1266961879 kmers +read 15000000 sequences, 2208992504 bases, 1278992566 kmers +read 15100000 sequences, 2226993777 bases, 1290793839 kmers +read 15200000 sequences, 2245240709 bases, 1302840771 kmers +read 15300000 sequences, 2264077653 bases, 1315477715 kmers +read 15400000 sequences, 2282789133 bases, 1327989195 kmers +read 15500000 sequences, 2301835592 bases, 1340835654 kmers +read 15600000 sequences, 2321337067 bases, 1354137129 kmers +read 15700000 sequences, 2341431050 bases, 1368031112 kmers +read 15800000 sequences, 2361547779 bases, 1381947841 kmers +read 15900000 sequences, 2382125643 bases, 1396325705 kmers +read 16000000 sequences, 2403497084 bases, 1411497146 kmers +read 16100000 sequences, 2424934249 bases, 1426734311 kmers +read 16200000 sequences, 2447209283 bases, 1442809345 kmers +read 16300000 sequences, 2470231701 bases, 1459631763 kmers +read 16400000 sequences, 2494280866 bases, 1477480928 kmers +read 16500000 sequences, 2519161479 bases, 1496161541 kmers +read 16600000 sequences, 2545406671 bases, 1516206733 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280753237549641.minimizers.3.bin'... +read 16636523 sequences, 2556368582 bases, 1524904156 kmers +num_kmers 1524904156 +num_super_kmers 105364487 +num_pieces 16636524 (+1.35283 [bits/kmer]) +=== step 1: 'parse_file' 203.245 [sec] (133.284 [ns/kmer]) + == files to merge = 4 +num_written_tuples = 50000000 +num_written_tuples = 100000000 +num_written_tuples = 105364487 +num_minimizers 61977146 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 5.40598 [sec] (3.54513 [ns/kmer]) +bits_per_offset = ceil(log2(2556368645)) = 32 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280962006872549.bucket_pairs.0.bin'... +num_singletons 51924477/61977146 (83.78%) +=== step 3: 'build_index' 13.1009 [sec] (8.59129 [ns/kmer]) +max_num_super_kmers_in_bucket 60556 +log2_max_num_super_kmers_in_bucket 16 +num_buckets_in_skew_index 36822/61977146 (0.0594122%) +num_partitions 7 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 32197643 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 16219357 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 8093770 + partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 3624950 + partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 3277017 + partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 2847802 + partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 60556: 8155619 +num_kmers_in_skew_index 74416158 (4.88005%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 32197643 + building MPHF with 64 threads and 256 partitions... + built mphs[0] for 32197643 keys; bits/key = 2.83246 + built positions[0] for 32197643 keys; bits/key = 7.00001 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 16219357 + building MPHF with 64 threads and 256 partitions... + built mphs[1] for 16219357 keys; bits/key = 2.91803 + built positions[1] for 16219357 keys; bits/key = 8.00002 + lower 256; upper 512; num_bits_per_pos 9; keys_in_partition.size() 8093770 + building MPHF with 64 threads and 256 partitions... + built mphs[2] for 8093770 keys; bits/key = 3.03812 + built positions[2] for 8093770 keys; bits/key = 9.00004 + lower 512; upper 1024; num_bits_per_pos 10; keys_in_partition.size() 3624950 + building MPHF with 64 threads and 256 partitions... + built mphs[3] for 3624950 keys; bits/key = 3.24774 + built positions[3] for 3624950 keys; bits/key = 10.0001 + lower 1024; upper 2048; num_bits_per_pos 11; keys_in_partition.size() 3277017 + building MPHF with 64 threads and 256 partitions... + built mphs[4] for 3277017 keys; bits/key = 3.30128 + built positions[4] for 3277017 keys; bits/key = 11.0001 + lower 2048; upper 4096; num_bits_per_pos 12; keys_in_partition.size() 2847802 + building MPHF with 64 threads and 256 partitions... + built mphs[5] for 2847802 keys; bits/key = 3.37697 + built positions[5] for 2847802 keys; bits/key = 12.0001 + lower 4096; upper 60556; num_bits_per_pos 16; keys_in_partition.size() 8155619 + building MPHF with 64 threads and 256 partitions... + built mphs[6] for 8155619 keys; bits/key = 3.03513 + built positions[6] for 8155619 keys; bits/key = 16 +num_bits_for_skew_index 885023664(0.58038 [bits/kmer]) +=== step 4: 'build_skew_index' 9.1251 [sec] (5.98405 [ns/kmer]) +=== total_time 230.877 [sec] (151.405 [ns/kmer]) +total index size: 1231531808 [B] -- 1231.53 [MB] +SPACE BREAKDOWN: + minimizers: 0.110789 [bits/kmer] (2.72589 [bits/key]) -- 1.71476% + pieces: 0.113881 [bits/kmer] -- 1.76261% + num_super_kmers_before_bucket: 0.0919584 [bits/kmer] -- 1.42331% + offsets: 2.21107 [bits/kmer] -- 34.2223% + strings: 3.35283 [bits/kmer] -- 51.8941% + skew_index: 0.58038 [bits/kmer] -- 8.98296% + weights: 9.65307e-07 [bits/kmer] -- 1.49407e-05% + weight_interval_values: 1.67879e-07 [bits/kmer] + weight_interval_lengths: 6.29548e-07 [bits/kmer] + weight_dictionary: 1.67879e-07 [bits/kmer] + -------------- + total: 6.4609 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 83.78% +buckets with 2 super_kmers = 8.50293% +buckets with 3 super_kmers = 2.72349% +buckets with 4 super_kmers = 1.28045% +buckets with 5 super_kmers = 0.74659% +buckets with 6 super_kmers = 0.490636% +buckets with 7 super_kmers = 0.346841% +buckets with 8 super_kmers = 0.259701% +buckets with 9 super_kmers = 0.201973% +buckets with 10 super_kmers = 0.163723% +buckets with 11 super_kmers = 0.13585% +buckets with 12 super_kmers = 0.116798% +buckets with 13 super_kmers = 0.100644% +buckets with 14 super_kmers = 0.0880276% +buckets with 15 super_kmers = 0.0784838% +buckets with 16 super_kmers = 0.0713408% +max_num_super_kmers_in_bucket 60556 +2025-11-27 23:03:04: saving data structure to disk... +2025-11-27 23:03:04: DONE +k = 63, m = 31, seed = 1, l = 6, c = 3, canonical_parsing = false, weighted = false +reading file '/mnt/hd2/pibiri/DNA/eulertigs/hprc.k63.eulertigs.fa.gz'... +m_buffer_size 29411764 +read 100000 sequences, 12340428 bases, 6140490 kmers +read 200000 sequences, 24646464 bases, 12246526 kmers +read 300000 sequences, 37018030 bases, 18418092 kmers +read 400000 sequences, 49394989 bases, 24595051 kmers +read 500000 sequences, 61758753 bases, 30758815 kmers +read 600000 sequences, 74141065 bases, 36941127 kmers +read 700000 sequences, 86514469 bases, 43114531 kmers +read 800000 sequences, 98874419 bases, 49274481 kmers +read 900000 sequences, 111243104 bases, 55443166 kmers +read 1000000 sequences, 123617917 bases, 61617979 kmers +read 1100000 sequences, 136042481 bases, 67842543 kmers +read 1200000 sequences, 148420567 bases, 74020629 kmers +read 1300000 sequences, 160880986 bases, 80281048 kmers +read 1400000 sequences, 173296738 bases, 86496800 kmers +read 1500000 sequences, 185720015 bases, 92720077 kmers +read 1600000 sequences, 198143788 bases, 98943850 kmers +read 1700000 sequences, 210604030 bases, 105204092 kmers +read 1800000 sequences, 223058949 bases, 111459011 kmers +read 1900000 sequences, 235529393 bases, 117729455 kmers +read 2000000 sequences, 248006332 bases, 124006394 kmers +read 2100000 sequences, 260467827 bases, 130267889 kmers +read 2200000 sequences, 272924289 bases, 136524351 kmers +read 2300000 sequences, 285441228 bases, 142841290 kmers +read 2400000 sequences, 297932568 bases, 149132630 kmers +read 2500000 sequences, 310463939 bases, 155464001 kmers +read 2600000 sequences, 323014521 bases, 161814583 kmers +read 2700000 sequences, 335527696 bases, 168127758 kmers +read 2800000 sequences, 348042722 bases, 174442784 kmers +read 2900000 sequences, 360581821 bases, 180781883 kmers +read 3000000 sequences, 373165681 bases, 187165743 kmers +read 3100000 sequences, 385714108 bases, 193514170 kmers +read 3200000 sequences, 398280992 bases, 199881054 kmers +read 3300000 sequences, 410870862 bases, 206270924 kmers +read 3400000 sequences, 423449691 bases, 212649753 kmers +read 3500000 sequences, 436046745 bases, 219046807 kmers +read 3600000 sequences, 448679809 bases, 225479871 kmers +read 3700000 sequences, 461321374 bases, 231921436 kmers +read 3800000 sequences, 473931319 bases, 238331381 kmers +read 3900000 sequences, 486569329 bases, 244769391 kmers +read 4000000 sequences, 499216681 bases, 251216743 kmers +read 4100000 sequences, 511895744 bases, 257695806 kmers +read 4200000 sequences, 524602447 bases, 264202509 kmers +read 4300000 sequences, 537274929 bases, 270674991 kmers +read 4400000 sequences, 549984249 bases, 277184311 kmers +read 4500000 sequences, 562699936 bases, 283699998 kmers +read 4600000 sequences, 575434830 bases, 290234892 kmers +read 4700000 sequences, 588209883 bases, 296809945 kmers +read 4800000 sequences, 600967786 bases, 303367848 kmers +read 4900000 sequences, 613729111 bases, 309929173 kmers +read 5000000 sequences, 626504015 bases, 316504077 kmers +read 5100000 sequences, 639246673 bases, 323046735 kmers +read 5200000 sequences, 652025570 bases, 329625632 kmers +read 5300000 sequences, 664852185 bases, 336252247 kmers +read 5400000 sequences, 677687532 bases, 342887594 kmers +read 5500000 sequences, 690503191 bases, 349503253 kmers +read 5600000 sequences, 703329021 bases, 356129083 kmers +read 5700000 sequences, 716216575 bases, 362816637 kmers +read 5800000 sequences, 729051677 bases, 369451739 kmers +read 5900000 sequences, 741931929 bases, 376131991 kmers +read 6000000 sequences, 754844870 bases, 382844932 kmers +read 6100000 sequences, 767765015 bases, 389565077 kmers +read 6200000 sequences, 780677651 bases, 396277713 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280985108358757.minimizers.0.bin'... +read 6300000 sequences, 793620604 bases, 403020666 kmers +read 6400000 sequences, 806637866 bases, 409837928 kmers +read 6500000 sequences, 819598810 bases, 416598872 kmers +read 6600000 sequences, 832595190 bases, 423395252 kmers +read 6700000 sequences, 845581476 bases, 430181538 kmers +read 6800000 sequences, 858575752 bases, 436975814 kmers +read 6900000 sequences, 871622804 bases, 443822866 kmers +read 7000000 sequences, 884611433 bases, 450611495 kmers +read 7100000 sequences, 897672844 bases, 457472906 kmers +read 7200000 sequences, 910732890 bases, 464332952 kmers +read 7300000 sequences, 923812876 bases, 471212938 kmers +read 7400000 sequences, 936909486 bases, 478109548 kmers +read 7500000 sequences, 949997076 bases, 484997138 kmers +read 7600000 sequences, 963107948 bases, 491908010 kmers +read 7700000 sequences, 976263311 bases, 498863373 kmers +read 7800000 sequences, 989391312 bases, 505791374 kmers +read 7900000 sequences, 1002542416 bases, 512742478 kmers +read 8000000 sequences, 1015719792 bases, 519719854 kmers +read 8100000 sequences, 1028930102 bases, 526730164 kmers +read 8200000 sequences, 1042133340 bases, 533733402 kmers +read 8300000 sequences, 1055343002 bases, 540743064 kmers +read 8400000 sequences, 1068571302 bases, 547771364 kmers +read 8500000 sequences, 1081782071 bases, 554782133 kmers +read 8600000 sequences, 1095081331 bases, 561881393 kmers +read 8700000 sequences, 1108381691 bases, 568981753 kmers +read 8800000 sequences, 1121704459 bases, 576104521 kmers +read 8900000 sequences, 1135025716 bases, 583225778 kmers +read 9000000 sequences, 1148384003 bases, 590384065 kmers +read 9100000 sequences, 1161802419 bases, 597602481 kmers +read 9200000 sequences, 1175228269 bases, 604828331 kmers +read 9300000 sequences, 1188645635 bases, 612045697 kmers +read 9400000 sequences, 1202107172 bases, 619307234 kmers +read 9500000 sequences, 1215616855 bases, 626616917 kmers +read 9600000 sequences, 1229082244 bases, 633882306 kmers +read 9700000 sequences, 1242623466 bases, 641223528 kmers +read 9800000 sequences, 1256182360 bases, 648582422 kmers +read 9900000 sequences, 1269727590 bases, 655927652 kmers +read 10000000 sequences, 1283305118 bases, 663305180 kmers +read 10100000 sequences, 1296926048 bases, 670726110 kmers +read 10200000 sequences, 1310556982 bases, 678157044 kmers +read 10300000 sequences, 1324233550 bases, 685633612 kmers +read 10400000 sequences, 1337906859 bases, 693106921 kmers +read 10500000 sequences, 1351643168 bases, 700643230 kmers +read 10600000 sequences, 1365330951 bases, 708131013 kmers +read 10700000 sequences, 1379108463 bases, 715708525 kmers +read 10800000 sequences, 1392877680 bases, 723277742 kmers +read 10900000 sequences, 1406644001 bases, 730844063 kmers +read 11000000 sequences, 1420392493 bases, 738392555 kmers +read 11100000 sequences, 1434208255 bases, 746008317 kmers +read 11200000 sequences, 1448044904 bases, 753644966 kmers +read 11300000 sequences, 1461973739 bases, 761373801 kmers +read 11400000 sequences, 1475908384 bases, 769108446 kmers +read 11500000 sequences, 1489828655 bases, 776828717 kmers +read 11600000 sequences, 1503804136 bases, 784604198 kmers +read 11700000 sequences, 1517825110 bases, 792425172 kmers +read 11800000 sequences, 1531854849 bases, 800254911 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280985108358757.minimizers.1.bin'... +read 11900000 sequences, 1545912444 bases, 808112506 kmers +read 12000000 sequences, 1559904915 bases, 815904977 kmers +read 12100000 sequences, 1574126558 bases, 823926620 kmers +read 12200000 sequences, 1588253922 bases, 831853984 kmers +read 12300000 sequences, 1602371618 bases, 839771680 kmers +read 12400000 sequences, 1616537157 bases, 847737219 kmers +read 12500000 sequences, 1630788602 bases, 855788664 kmers +read 12600000 sequences, 1644978314 bases, 863778376 kmers +read 12700000 sequences, 1659241146 bases, 871841208 kmers +read 12800000 sequences, 1673584118 bases, 879984180 kmers +read 12900000 sequences, 1687940515 bases, 888140577 kmers +read 13000000 sequences, 1702343713 bases, 896343775 kmers +read 13100000 sequences, 1716759504 bases, 904559566 kmers +read 13200000 sequences, 1731228620 bases, 912828682 kmers +read 13300000 sequences, 1745703567 bases, 921103629 kmers +read 13400000 sequences, 1760203118 bases, 929403180 kmers +read 13500000 sequences, 1774792602 bases, 937792664 kmers +read 13600000 sequences, 1789438596 bases, 946238658 kmers +read 13700000 sequences, 1804059588 bases, 954659650 kmers +read 13800000 sequences, 1818743867 bases, 963143929 kmers +read 13900000 sequences, 1833395553 bases, 971595615 kmers +read 14000000 sequences, 1848153717 bases, 980153779 kmers +read 14100000 sequences, 1862998527 bases, 988798589 kmers +read 14200000 sequences, 1877911963 bases, 997512025 kmers +read 14300000 sequences, 1892816751 bases, 1006216813 kmers +read 14400000 sequences, 1907719813 bases, 1014919875 kmers +read 14500000 sequences, 1922766528 bases, 1023766590 kmers +read 14600000 sequences, 1937831250 bases, 1032631312 kmers +read 14700000 sequences, 1952937508 bases, 1041537570 kmers +read 14800000 sequences, 1968048267 bases, 1050448329 kmers +read 14900000 sequences, 1983251368 bases, 1059451430 kmers +read 15000000 sequences, 1998487486 bases, 1068487548 kmers +read 15100000 sequences, 2013788234 bases, 1077588296 kmers +read 15200000 sequences, 2029113702 bases, 1086713764 kmers +read 15300000 sequences, 2044464630 bases, 1095864692 kmers +read 15400000 sequences, 2059908322 bases, 1105108384 kmers +read 15500000 sequences, 2075402983 bases, 1114403045 kmers +read 15600000 sequences, 2090966187 bases, 1123766249 kmers +read 15700000 sequences, 2106543558 bases, 1133143620 kmers +read 15800000 sequences, 2122217022 bases, 1142617084 kmers +read 15900000 sequences, 2137840083 bases, 1152040145 kmers +read 16000000 sequences, 2153589344 bases, 1161589406 kmers +read 16100000 sequences, 2169260628 bases, 1171060690 kmers +read 16200000 sequences, 2185176930 bases, 1180776992 kmers +read 16300000 sequences, 2201140458 bases, 1190540520 kmers +read 16400000 sequences, 2217102473 bases, 1200302535 kmers +read 16500000 sequences, 2233154507 bases, 1210154569 kmers +read 16600000 sequences, 2249289162 bases, 1220089224 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280985108358757.minimizers.2.bin'... +read 16700000 sequences, 2265554961 bases, 1230155023 kmers +read 16800000 sequences, 2281740014 bases, 1240140076 kmers +read 16900000 sequences, 2298107838 bases, 1250307900 kmers +read 17000000 sequences, 2314472095 bases, 1260472157 kmers +read 17100000 sequences, 2331031176 bases, 1270831238 kmers +read 17200000 sequences, 2347660266 bases, 1281260328 kmers +read 17300000 sequences, 2364281388 bases, 1291681450 kmers +read 17400000 sequences, 2381150963 bases, 1302351025 kmers +read 17500000 sequences, 2398014863 bases, 1313014925 kmers +read 17600000 sequences, 2414848399 bases, 1323648461 kmers +read 17700000 sequences, 2431827698 bases, 1334427760 kmers +read 17800000 sequences, 2448880349 bases, 1345280411 kmers +read 17900000 sequences, 2466079400 bases, 1356279462 kmers +read 18000000 sequences, 2483330959 bases, 1367331021 kmers +read 18100000 sequences, 2500735710 bases, 1378535772 kmers +read 18200000 sequences, 2518182276 bases, 1389782338 kmers +read 18300000 sequences, 2535776920 bases, 1401176982 kmers +read 18400000 sequences, 2553420194 bases, 1412620256 kmers +read 18500000 sequences, 2571217426 bases, 1424217488 kmers +read 18600000 sequences, 2589036466 bases, 1435836528 kmers +read 18700000 sequences, 2607070465 bases, 1447670527 kmers +read 18800000 sequences, 2625276965 bases, 1459677027 kmers +read 18900000 sequences, 2643374208 bases, 1471574270 kmers +read 19000000 sequences, 2661730118 bases, 1483730180 kmers +read 19100000 sequences, 2680149690 bases, 1495949752 kmers +read 19200000 sequences, 2698694575 bases, 1508294637 kmers +read 19300000 sequences, 2717352858 bases, 1520752920 kmers +read 19400000 sequences, 2736256794 bases, 1533456856 kmers +read 19500000 sequences, 2755257099 bases, 1546257161 kmers +read 19600000 sequences, 2774351092 bases, 1559151154 kmers +read 19700000 sequences, 2793728147 bases, 1572328209 kmers +read 19800000 sequences, 2813220812 bases, 1585620874 kmers +read 19900000 sequences, 2832722318 bases, 1598922380 kmers +read 20000000 sequences, 2852409685 bases, 1612409747 kmers +read 20100000 sequences, 2872347441 bases, 1626147503 kmers +read 20200000 sequences, 2892389463 bases, 1639989525 kmers +read 20300000 sequences, 2912735330 bases, 1654135392 kmers +read 20400000 sequences, 2933385055 bases, 1668585117 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280985108358757.minimizers.3.bin'... +read 20500000 sequences, 2954091410 bases, 1683091472 kmers +read 20600000 sequences, 2974925213 bases, 1697725275 kmers +read 20700000 sequences, 2995915692 bases, 1712515754 kmers +read 20800000 sequences, 3017122624 bases, 1727522686 kmers +read 20900000 sequences, 3038531652 bases, 1742731714 kmers +read 21000000 sequences, 3060194460 bases, 1758194522 kmers +read 21100000 sequences, 3081907028 bases, 1773707090 kmers +read 21200000 sequences, 3104086626 bases, 1789686688 kmers +read 21300000 sequences, 3126473195 bases, 1805873257 kmers +read 21400000 sequences, 3149107906 bases, 1822307968 kmers +read 21500000 sequences, 3171928079 bases, 1838928141 kmers +read 21600000 sequences, 3194957971 bases, 1855758033 kmers +read 21700000 sequences, 3218340993 bases, 1872941055 kmers +read 21800000 sequences, 3241998343 bases, 1890398405 kmers +read 21900000 sequences, 3265867458 bases, 1908067520 kmers +read 22000000 sequences, 3290140116 bases, 1926140178 kmers +read 22100000 sequences, 3314773229 bases, 1944573291 kmers +read 22200000 sequences, 3339683134 bases, 1963283196 kmers +read 22300000 sequences, 3365026133 bases, 1982426195 kmers +read 22400000 sequences, 3390524791 bases, 2001724853 kmers +read 22500000 sequences, 3416602833 bases, 2021602895 kmers +read 22600000 sequences, 3443068690 bases, 2041868752 kmers +read 22700000 sequences, 3469843700 bases, 2062443762 kmers +read 22800000 sequences, 3496984478 bases, 2083384540 kmers +read 22900000 sequences, 3524534052 bases, 2104734114 kmers +read 23000000 sequences, 3552570866 bases, 2126570928 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280985108358757.minimizers.4.bin'... +read 23100000 sequences, 3581192297 bases, 2148992359 kmers +read 23200000 sequences, 3610381722 bases, 2171981784 kmers +read 23300000 sequences, 3639850904 bases, 2195250966 kmers +read 23400000 sequences, 3669893515 bases, 2219093577 kmers +read 23500000 sequences, 3700753180 bases, 2243753242 kmers +read 23600000 sequences, 3732004041 bases, 2268804103 kmers +read 23700000 sequences, 3763988485 bases, 2294588547 kmers +read 23800000 sequences, 3796342436 bases, 2320742498 kmers +read 23900000 sequences, 3829616775 bases, 2347816837 kmers +read 24000000 sequences, 3863888823 bases, 2375888885 kmers +read 24100000 sequences, 3898563459 bases, 2404363521 kmers +read 24200000 sequences, 3934104488 bases, 2433704550 kmers +read 24300000 sequences, 3970663407 bases, 2464063469 kmers +read 24400000 sequences, 4008289022 bases, 2495489084 kmers +read 24500000 sequences, 4046653033 bases, 2527653095 kmers +read 24600000 sequences, 4085473878 bases, 2560273940 kmers +read 24700000 sequences, 4125325829 bases, 2593925891 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280985108358757.minimizers.5.bin'... +read 24800000 sequences, 4166644355 bases, 2629044417 kmers +read 24900000 sequences, 4209155738 bases, 2665355800 kmers +read 25000000 sequences, 4253357779 bases, 2703357841 kmers +read 25100000 sequences, 4298305920 bases, 2742105982 kmers +read 25200000 sequences, 4345040766 bases, 2782640828 kmers +read 25300000 sequences, 4392930039 bases, 2824330101 kmers +read 25400000 sequences, 4442653000 bases, 2867853062 kmers +read 25500000 sequences, 4494210063 bases, 2913210125 kmers +read 25600000 sequences, 4547341577 bases, 2960141639 kmers +read 25700000 sequences, 4602288626 bases, 3008888688 kmers +read 25800000 sequences, 4659267065 bases, 3059667127 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280985108358757.minimizers.6.bin'... +read 25900000 sequences, 4718698709 bases, 3112898771 kmers +read 26000000 sequences, 4780487522 bases, 3168487584 kmers +read 26100000 sequences, 4845830481 bases, 3227630543 kmers +read 26200000 sequences, 4913892030 bases, 3289492092 kmers +read 26300000 sequences, 4985279649 bases, 3354679711 kmers +read 26400000 sequences, 5059257799 bases, 3422457861 kmers +read 26500000 sequences, 5137444349 bases, 3494444411 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280985108358757.minimizers.7.bin'... +read 26600000 sequences, 5220217341 bases, 3571017403 kmers +read 26700000 sequences, 5307644757 bases, 3652244819 kmers +read 26800000 sequences, 5400268538 bases, 3738668600 kmers +read 26900000 sequences, 5498820865 bases, 3831020927 kmers +read 27000000 sequences, 5604483878 bases, 3930483940 kmers +read 27100000 sequences, 5717045771 bases, 4036845833 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280985108358757.minimizers.8.bin'... +read 27200000 sequences, 5833610977 bases, 4147211039 kmers +read 27300000 sequences, 5846284831 bases, 4153684893 kmers +read 27400000 sequences, 5857666136 bases, 4158866198 kmers +read 27500000 sequences, 5869033199 bases, 4164033261 kmers +read 27600000 sequences, 5880411993 bases, 4169212055 kmers +read 27700000 sequences, 5891780011 bases, 4174380073 kmers +read 27800000 sequences, 5903175513 bases, 4179575575 kmers +read 27900000 sequences, 5914554374 bases, 4184754436 kmers +read 28000000 sequences, 5925952810 bases, 4189952872 kmers +read 28100000 sequences, 5937329908 bases, 4195129970 kmers +read 28200000 sequences, 5948710629 bases, 4200310691 kmers +read 28300000 sequences, 5960084042 bases, 4205484104 kmers +read 28400000 sequences, 5971480602 bases, 4210680664 kmers +read 28500000 sequences, 5982848841 bases, 4215848903 kmers +read 28600000 sequences, 5994224296 bases, 4221024358 kmers +read 28700000 sequences, 6005620451 bases, 4226220513 kmers +read 28800000 sequences, 6016990442 bases, 4231390504 kmers +read 28900000 sequences, 6028385648 bases, 4236585710 kmers +read 29000000 sequences, 6039783825 bases, 4241783887 kmers +read 29100000 sequences, 6051178683 bases, 4246978745 kmers +read 29200000 sequences, 6062557847 bases, 4252157909 kmers +read 29300000 sequences, 6073940813 bases, 4257340875 kmers +read 29400000 sequences, 6085331250 bases, 4262531312 kmers +read 29500000 sequences, 6096728346 bases, 4267728408 kmers +read 29600000 sequences, 6108102978 bases, 4272903040 kmers +read 29700000 sequences, 6119481358 bases, 4278081420 kmers +read 29800000 sequences, 6130856114 bases, 4283256176 kmers +read 29900000 sequences, 6142238297 bases, 4288438359 kmers +read 30000000 sequences, 6153634829 bases, 4293634891 kmers +read 30100000 sequences, 6165020049 bases, 4298820111 kmers +read 30200000 sequences, 6176433423 bases, 4304033485 kmers +read 30300000 sequences, 6187857491 bases, 4309257553 kmers +read 30400000 sequences, 6199252194 bases, 4314452256 kmers +read 30500000 sequences, 6210672548 bases, 4319672610 kmers +read 30600000 sequences, 6222091861 bases, 4324891923 kmers +read 30700000 sequences, 6233490435 bases, 4330090497 kmers +read 30800000 sequences, 6244878831 bases, 4335278893 kmers +read 30900000 sequences, 6256282376 bases, 4340482438 kmers +read 31000000 sequences, 6267683956 bases, 4345684018 kmers +read 31100000 sequences, 6279097813 bases, 4350897875 kmers +read 31200000 sequences, 6290508351 bases, 4356108413 kmers +read 31300000 sequences, 6301930261 bases, 4361330323 kmers +read 31400000 sequences, 6313323207 bases, 4366523269 kmers +read 31500000 sequences, 6324738619 bases, 4371738681 kmers +read 31600000 sequences, 6336131574 bases, 4376931636 kmers +read 31700000 sequences, 6347524494 bases, 4382124556 kmers +read 31800000 sequences, 6358957002 bases, 4387357064 kmers +read 31900000 sequences, 6370374986 bases, 4392575048 kmers +read 32000000 sequences, 6381788111 bases, 4397788173 kmers +read 32100000 sequences, 6393224920 bases, 4403024982 kmers +read 32200000 sequences, 6404645946 bases, 4408246008 kmers +read 32300000 sequences, 6416049834 bases, 4413449896 kmers +read 32400000 sequences, 6427468467 bases, 4418668529 kmers +read 32500000 sequences, 6438906136 bases, 4423906198 kmers +read 32600000 sequences, 6450334707 bases, 4429134769 kmers +read 32700000 sequences, 6461755008 bases, 4434355070 kmers +read 32800000 sequences, 6473195587 bases, 4439595649 kmers +read 32900000 sequences, 6484644167 bases, 4444844229 kmers +read 33000000 sequences, 6496092416 bases, 4450092478 kmers +read 33100000 sequences, 6507511441 bases, 4455311503 kmers +read 33200000 sequences, 6518945085 bases, 4460545147 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280985108358757.minimizers.9.bin'... +read 33300000 sequences, 6530384425 bases, 4465784487 kmers +read 33400000 sequences, 6541805219 bases, 4471005281 kmers +read 33500000 sequences, 6553237504 bases, 4476237566 kmers +read 33600000 sequences, 6564689101 bases, 4481489163 kmers +read 33700000 sequences, 6576122082 bases, 4486722144 kmers +read 33800000 sequences, 6587572222 bases, 4491972284 kmers +read 33900000 sequences, 6599018743 bases, 4497218805 kmers +read 34000000 sequences, 6610456706 bases, 4502456768 kmers +read 34100000 sequences, 6621897287 bases, 4507697349 kmers +read 34200000 sequences, 6633348561 bases, 4512948623 kmers +read 34300000 sequences, 6644796892 bases, 4518196954 kmers +read 34400000 sequences, 6656241823 bases, 4523441885 kmers +read 34500000 sequences, 6667691043 bases, 4528691105 kmers +read 34600000 sequences, 6679157213 bases, 4533957275 kmers +read 34700000 sequences, 6690642188 bases, 4539242250 kmers +read 34800000 sequences, 6702109473 bases, 4544509535 kmers +read 34900000 sequences, 6713569359 bases, 4549769421 kmers +read 35000000 sequences, 6725025515 bases, 4555025577 kmers +read 35100000 sequences, 6736483861 bases, 4560283923 kmers +read 35200000 sequences, 6747950533 bases, 4565550595 kmers +read 35300000 sequences, 6759386980 bases, 4570787042 kmers +read 35400000 sequences, 6770873467 bases, 4576073529 kmers +read 35500000 sequences, 6782328796 bases, 4581328858 kmers +read 35600000 sequences, 6793801566 bases, 4586601628 kmers +read 35700000 sequences, 6805288962 bases, 4591889024 kmers +read 35800000 sequences, 6816766599 bases, 4597166661 kmers +read 35900000 sequences, 6828221603 bases, 4602421665 kmers +read 36000000 sequences, 6839697263 bases, 4607697325 kmers +read 36100000 sequences, 6851167968 bases, 4612968030 kmers +read 36200000 sequences, 6862661069 bases, 4618261131 kmers +read 36300000 sequences, 6874122160 bases, 4623522222 kmers +read 36400000 sequences, 6885601876 bases, 4628801938 kmers +read 36500000 sequences, 6897090990 bases, 4634091052 kmers +read 36600000 sequences, 6908577092 bases, 4639377154 kmers +read 36700000 sequences, 6920070395 bases, 4644670457 kmers +read 36800000 sequences, 6931582953 bases, 4649983015 kmers +read 36900000 sequences, 6943074936 bases, 4655274998 kmers +read 37000000 sequences, 6954565924 bases, 4660565986 kmers +read 37100000 sequences, 6966070452 bases, 4665870514 kmers +read 37200000 sequences, 6977560415 bases, 4671160477 kmers +read 37300000 sequences, 6989066925 bases, 4676466987 kmers +read 37400000 sequences, 7000566400 bases, 4681766462 kmers +read 37500000 sequences, 7012064100 bases, 4687064162 kmers +read 37600000 sequences, 7023578971 bases, 4692379033 kmers +read 37700000 sequences, 7035113747 bases, 4697713809 kmers +read 37800000 sequences, 7046611117 bases, 4703011179 kmers +read 37900000 sequences, 7058112871 bases, 4708312933 kmers +read 38000000 sequences, 7069620689 bases, 4713620751 kmers +read 38100000 sequences, 7081124602 bases, 4718924664 kmers +read 38200000 sequences, 7092641574 bases, 4724241636 kmers +read 38300000 sequences, 7104173321 bases, 4729573383 kmers +read 38400000 sequences, 7115693749 bases, 4734893811 kmers +read 38500000 sequences, 7127223394 bases, 4740223456 kmers +read 38600000 sequences, 7138742801 bases, 4745542863 kmers +read 38700000 sequences, 7150287920 bases, 4750887982 kmers +read 38800000 sequences, 7161816661 bases, 4756216723 kmers +read 38900000 sequences, 7173324334 bases, 4761524396 kmers +read 39000000 sequences, 7184856265 bases, 4766856327 kmers +read 39100000 sequences, 7196427309 bases, 4772227371 kmers +read 39200000 sequences, 7207963095 bases, 4777563157 kmers +read 39300000 sequences, 7219512038 bases, 4782912100 kmers +read 39400000 sequences, 7231058926 bases, 4788258988 kmers +read 39500000 sequences, 7242615745 bases, 4793615807 kmers +read 39600000 sequences, 7254151997 bases, 4798952059 kmers +read 39700000 sequences, 7265709248 bases, 4804309310 kmers +read 39800000 sequences, 7277261549 bases, 4809661611 kmers +read 39900000 sequences, 7288795953 bases, 4814996015 kmers +read 40000000 sequences, 7300352386 bases, 4820352448 kmers +read 40100000 sequences, 7311909203 bases, 4825709265 kmers +read 40200000 sequences, 7323455080 bases, 4831055142 kmers +read 40300000 sequences, 7335021048 bases, 4836421110 kmers +read 40400000 sequences, 7346572779 bases, 4841772841 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280985108358757.minimizers.10.bin'... +read 40500000 sequences, 7358141861 bases, 4847141923 kmers +read 40600000 sequences, 7369700966 bases, 4852501028 kmers +read 40700000 sequences, 7381268484 bases, 4857868546 kmers +read 40800000 sequences, 7392842850 bases, 4863242912 kmers +read 40900000 sequences, 7404426052 bases, 4868626114 kmers +read 41000000 sequences, 7415987087 bases, 4873987149 kmers +read 41100000 sequences, 7427574902 bases, 4879374964 kmers +read 41200000 sequences, 7439175505 bases, 4884775567 kmers +read 41300000 sequences, 7450739513 bases, 4890139575 kmers +read 41400000 sequences, 7462313943 bases, 4895514005 kmers +read 41500000 sequences, 7473895566 bases, 4900895628 kmers +read 41600000 sequences, 7485498872 bases, 4906298934 kmers +read 41700000 sequences, 7497097591 bases, 4911697653 kmers +read 41800000 sequences, 7508699276 bases, 4917099338 kmers +read 41900000 sequences, 7520283386 bases, 4922483448 kmers +read 42000000 sequences, 7531875484 bases, 4927875546 kmers +read 42100000 sequences, 7543485183 bases, 4933285245 kmers +read 42200000 sequences, 7555088346 bases, 4938688408 kmers +read 42300000 sequences, 7566689709 bases, 4944089771 kmers +read 42400000 sequences, 7578300740 bases, 4949500802 kmers +read 42500000 sequences, 7589916966 bases, 4954917028 kmers +read 42600000 sequences, 7601521236 bases, 4960321298 kmers +read 42700000 sequences, 7613130580 bases, 4965730642 kmers +read 42800000 sequences, 7624734645 bases, 4971134707 kmers +read 42900000 sequences, 7636365632 bases, 4976565694 kmers +read 43000000 sequences, 7647987121 bases, 4981987183 kmers +read 43100000 sequences, 7659607327 bases, 4987407389 kmers +read 43200000 sequences, 7671221477 bases, 4992821539 kmers +read 43300000 sequences, 7682851013 bases, 4998251075 kmers +read 43400000 sequences, 7694478022 bases, 5003678084 kmers +read 43500000 sequences, 7706091680 bases, 5009091742 kmers +read 43600000 sequences, 7717739091 bases, 5014539153 kmers +read 43700000 sequences, 7729379970 bases, 5019980032 kmers +read 43800000 sequences, 7741005738 bases, 5025405800 kmers +read 43900000 sequences, 7752654757 bases, 5030854819 kmers +read 44000000 sequences, 7764325440 bases, 5036325502 kmers +read 44100000 sequences, 7775977254 bases, 5041777316 kmers +read 44200000 sequences, 7787629671 bases, 5047229733 kmers +read 44300000 sequences, 7799282350 bases, 5052682412 kmers +read 44400000 sequences, 7810951187 bases, 5058151249 kmers +read 44500000 sequences, 7822598421 bases, 5063598483 kmers +read 44600000 sequences, 7834244714 bases, 5069044776 kmers +read 44700000 sequences, 7845891240 bases, 5074491302 kmers +read 44800000 sequences, 7857565519 bases, 5079965581 kmers +read 44900000 sequences, 7869252351 bases, 5085452413 kmers +read 45000000 sequences, 7880919098 bases, 5090919160 kmers +read 45100000 sequences, 7892576949 bases, 5096377011 kmers +read 45200000 sequences, 7904241929 bases, 5101841991 kmers +read 45300000 sequences, 7915920658 bases, 5107320720 kmers +read 45400000 sequences, 7927605513 bases, 5112805575 kmers +read 45500000 sequences, 7939292816 bases, 5118292878 kmers +read 45600000 sequences, 7950975991 bases, 5123776053 kmers +read 45700000 sequences, 7962666759 bases, 5129266821 kmers +read 45800000 sequences, 7974341946 bases, 5134742008 kmers +read 45900000 sequences, 7986038612 bases, 5140238674 kmers +read 46000000 sequences, 7997748818 bases, 5145748880 kmers +read 46100000 sequences, 8009477725 bases, 5151277787 kmers +read 46200000 sequences, 8021166242 bases, 5156766304 kmers +read 46300000 sequences, 8032854170 bases, 5162254232 kmers +read 46400000 sequences, 8044555955 bases, 5167756017 kmers +read 46500000 sequences, 8056240494 bases, 5173240556 kmers +read 46600000 sequences, 8067931653 bases, 5178731715 kmers +read 46700000 sequences, 8079648908 bases, 5184248970 kmers +read 46800000 sequences, 8091384712 bases, 5189784774 kmers +read 46900000 sequences, 8103124079 bases, 5195324141 kmers +read 47000000 sequences, 8114852096 bases, 5200852158 kmers +read 47100000 sequences, 8126577432 bases, 5206377494 kmers +read 47200000 sequences, 8138321107 bases, 5211921169 kmers +read 47300000 sequences, 8150082884 bases, 5217482946 kmers +read 47400000 sequences, 8161823368 bases, 5223023430 kmers +read 47500000 sequences, 8173558332 bases, 5228558394 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280985108358757.minimizers.11.bin'... +read 47600000 sequences, 8185297180 bases, 5234097242 kmers +read 47700000 sequences, 8197020764 bases, 5239620826 kmers +read 47800000 sequences, 8208783838 bases, 5245183900 kmers +read 47900000 sequences, 8220547786 bases, 5250747848 kmers +read 48000000 sequences, 8232292674 bases, 5256292736 kmers +read 48100000 sequences, 8244054434 bases, 5261854496 kmers +read 48200000 sequences, 8255808686 bases, 5267408748 kmers +read 48300000 sequences, 8267574482 bases, 5272974544 kmers +read 48400000 sequences, 8279340872 bases, 5278540934 kmers +read 48500000 sequences, 8291110333 bases, 5284110395 kmers +read 48600000 sequences, 8302889275 bases, 5289689337 kmers +read 48700000 sequences, 8314648922 bases, 5295248984 kmers +read 48800000 sequences, 8326414162 bases, 5300814224 kmers +read 48900000 sequences, 8338207337 bases, 5306407399 kmers +read 49000000 sequences, 8349993258 bases, 5311993320 kmers +read 49100000 sequences, 8361791349 bases, 5317591411 kmers +read 49200000 sequences, 8373582804 bases, 5323182866 kmers +read 49300000 sequences, 8385394571 bases, 5328794633 kmers +read 49400000 sequences, 8397202995 bases, 5334403057 kmers +read 49500000 sequences, 8409020292 bases, 5340020354 kmers +read 49600000 sequences, 8420828815 bases, 5345628877 kmers +read 49700000 sequences, 8432643281 bases, 5351243343 kmers +read 49800000 sequences, 8444451677 bases, 5356851739 kmers +read 49900000 sequences, 8456260166 bases, 5362460228 kmers +read 50000000 sequences, 8468086036 bases, 5368086098 kmers +read 50100000 sequences, 8479897293 bases, 5373697355 kmers +read 50200000 sequences, 8491729262 bases, 5379329324 kmers +read 50300000 sequences, 8503563407 bases, 5384963469 kmers +read 50400000 sequences, 8515408110 bases, 5390608172 kmers +read 50500000 sequences, 8527226877 bases, 5396226939 kmers +read 50600000 sequences, 8539067669 bases, 5401867731 kmers +read 50700000 sequences, 8550899987 bases, 5407500049 kmers +read 50800000 sequences, 8562770507 bases, 5413170569 kmers +read 50900000 sequences, 8574595545 bases, 5418795607 kmers +read 51000000 sequences, 8586456412 bases, 5424456474 kmers +read 51100000 sequences, 8598316076 bases, 5430116138 kmers +read 51200000 sequences, 8610172262 bases, 5435772324 kmers +read 51300000 sequences, 8622055720 bases, 5441455782 kmers +read 51400000 sequences, 8633927263 bases, 5447127325 kmers +read 51500000 sequences, 8645823524 bases, 5452823586 kmers +read 51600000 sequences, 8657691355 bases, 5458491417 kmers +read 51700000 sequences, 8669585563 bases, 5464185625 kmers +read 51800000 sequences, 8681474258 bases, 5469874320 kmers +read 51900000 sequences, 8693381907 bases, 5475581969 kmers +read 52000000 sequences, 8705279756 bases, 5481279818 kmers +read 52100000 sequences, 8717209451 bases, 5487009513 kmers +read 52200000 sequences, 8729118869 bases, 5492718931 kmers +read 52300000 sequences, 8741023453 bases, 5498423515 kmers +read 52400000 sequences, 8752969676 bases, 5504169738 kmers +read 52500000 sequences, 8764903676 bases, 5509903738 kmers +read 52600000 sequences, 8776822814 bases, 5515622876 kmers +read 52700000 sequences, 8788751015 bases, 5521351077 kmers +read 52800000 sequences, 8800669725 bases, 5527069787 kmers +read 52900000 sequences, 8812598349 bases, 5532798411 kmers +read 53000000 sequences, 8824571572 bases, 5538571634 kmers +read 53100000 sequences, 8836518883 bases, 5544318945 kmers +read 53200000 sequences, 8848466396 bases, 5550066458 kmers +read 53300000 sequences, 8860413169 bases, 5555813231 kmers +read 53400000 sequences, 8872384224 bases, 5561584286 kmers +read 53500000 sequences, 8884332155 bases, 5567332217 kmers +read 53600000 sequences, 8896284511 bases, 5573084573 kmers +read 53700000 sequences, 8908270586 bases, 5578870648 kmers +read 53800000 sequences, 8920267982 bases, 5584668044 kmers +read 53900000 sequences, 8932245973 bases, 5590446035 kmers +read 54000000 sequences, 8944259742 bases, 5596259804 kmers +read 54100000 sequences, 8956269308 bases, 5602069370 kmers +read 54200000 sequences, 8968276811 bases, 5607876873 kmers +read 54300000 sequences, 8980258845 bases, 5613658907 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280985108358757.minimizers.12.bin'... +read 54400000 sequences, 8992247801 bases, 5619447863 kmers +read 54500000 sequences, 9004254133 bases, 5625254195 kmers +read 54600000 sequences, 9016278273 bases, 5631078335 kmers +read 54700000 sequences, 9028312029 bases, 5636912091 kmers +read 54800000 sequences, 9040331856 bases, 5642731918 kmers +read 54900000 sequences, 9052350704 bases, 5648550766 kmers +read 55000000 sequences, 9064361175 bases, 5654361237 kmers +read 55100000 sequences, 9076390694 bases, 5660190756 kmers +read 55200000 sequences, 9088446082 bases, 5666046144 kmers +read 55300000 sequences, 9100525786 bases, 5671925848 kmers +read 55400000 sequences, 9112587186 bases, 5677787248 kmers +read 55500000 sequences, 9124623681 bases, 5683623743 kmers +read 55600000 sequences, 9136715382 bases, 5689515444 kmers +read 55700000 sequences, 9148800729 bases, 5695400791 kmers +read 55800000 sequences, 9160843647 bases, 5701243709 kmers +read 55900000 sequences, 9172925217 bases, 5707125279 kmers +read 56000000 sequences, 9185024099 bases, 5713024161 kmers +read 56100000 sequences, 9197117041 bases, 5718917103 kmers +read 56200000 sequences, 9209230518 bases, 5724830580 kmers +read 56300000 sequences, 9221344370 bases, 5730744432 kmers +read 56400000 sequences, 9233465281 bases, 5736665343 kmers +read 56500000 sequences, 9245571873 bases, 5742571935 kmers +read 56600000 sequences, 9257641594 bases, 5748441656 kmers +read 56700000 sequences, 9269746121 bases, 5754346183 kmers +read 56800000 sequences, 9281862419 bases, 5760262481 kmers +read 56900000 sequences, 9294025235 bases, 5766225297 kmers +read 57000000 sequences, 9306137861 bases, 5772137923 kmers +read 57100000 sequences, 9318287515 bases, 5778087577 kmers +read 57200000 sequences, 9330445537 bases, 5784045599 kmers +read 57300000 sequences, 9342614929 bases, 5790014991 kmers +read 57400000 sequences, 9354790599 bases, 5795990661 kmers +read 57500000 sequences, 9366968184 bases, 5801968246 kmers +read 57600000 sequences, 9379136016 bases, 5807936078 kmers +read 57700000 sequences, 9391324284 bases, 5813924346 kmers +read 57800000 sequences, 9403502226 bases, 5819902288 kmers +read 57900000 sequences, 9415684578 bases, 5825884640 kmers +read 58000000 sequences, 9427875846 bases, 5831875908 kmers +read 58100000 sequences, 9440085310 bases, 5837885372 kmers +read 58200000 sequences, 9452280471 bases, 5843880533 kmers +read 58300000 sequences, 9464471817 bases, 5849871879 kmers +read 58400000 sequences, 9476715907 bases, 5855915969 kmers +read 58500000 sequences, 9488919933 bases, 5861919995 kmers +read 58600000 sequences, 9501146085 bases, 5867946147 kmers +read 58700000 sequences, 9513406483 bases, 5874006545 kmers +read 58800000 sequences, 9525653213 bases, 5880053275 kmers +read 58900000 sequences, 9537897953 bases, 5886098015 kmers +read 59000000 sequences, 9550182056 bases, 5892182118 kmers +read 59100000 sequences, 9562418655 bases, 5898218717 kmers +read 59200000 sequences, 9574664926 bases, 5904264988 kmers +read 59300000 sequences, 9586958590 bases, 5910358652 kmers +read 59400000 sequences, 9599304698 bases, 5916504760 kmers +read 59500000 sequences, 9611572956 bases, 5922573018 kmers +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764280985108358757.minimizers.13.bin'... +read 59568965 sequences, 9620061299 bases, 5926785469 kmers +num_kmers 5926785469 +num_super_kmers 405359784 +num_pieces 59568966 (+1.2463 [bits/kmer]) +=== step 1: 'parse_file' 786.287 [sec] (132.667 [ns/kmer]) + == files to merge = 14 +num_written_tuples = 50000000 +num_written_tuples = 100000000 +num_written_tuples = 150000000 +num_written_tuples = 200000000 +num_written_tuples = 250000000 +num_written_tuples = 300000000 +num_written_tuples = 350000000 +num_written_tuples = 400000000 +num_written_tuples = 405359784 +num_minimizers 274081508 +building minimizers MPHF with 64 threads and 256 partitions... +=== step 2: 'build_minimizers' 26.3309 [sec] (4.44269 [ns/kmer]) +bits_per_offset = ceil(log2(9620061362)) = 34 +m_buffer_size 20833333 +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764281798179498846.bucket_pairs.0.bin'... +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764281798179498846.bucket_pairs.1.bin'... +sorting buffer... +saving to file '/mnt/hd2/pibiri/DNA/eulertigs/tmp_dir/sshash.tmp.run_1764281798179498846.bucket_pairs.2.bin'... +num_singletons 222713378/274081508 (81.2581%) + == files to merge = 3 +num_written_pairs = 50000000 +num_written_pairs = 51368130 +=== step 3: 'build_index' 88.5287 [sec] (14.9371 [ns/kmer]) +max_num_super_kmers_in_bucket 181286 +log2_max_num_super_kmers_in_bucket 18 +num_buckets_in_skew_index 131732/274081508 (0.0480631%) +num_partitions 7 +computing sizes of partitions... + partition_id = 0: num_kmers belonging to buckets of size > 64 and <= 128: 83511239 + partition_id = 1: num_kmers belonging to buckets of size > 128 and <= 256: 75062454 + partition_id = 2: num_kmers belonging to buckets of size > 256 and <= 512: 66750165 + partition_id = 3: num_kmers belonging to buckets of size > 512 and <= 1024: 56409346 + partition_id = 4: num_kmers belonging to buckets of size > 1024 and <= 2048: 47147744 + partition_id = 5: num_kmers belonging to buckets of size > 2048 and <= 4096: 37159719 + partition_id = 6: num_kmers belonging to buckets of size > 4096 and <= 181286: 90565466 +num_kmers_in_skew_index 456606133 (7.70411%) +building partitions... + lower 64; upper 128; num_bits_per_pos 7; keys_in_partition.size() 83511239 + building MPHF with 64 threads and 256 partitions... + built mphs[0] for 83511239 keys; bits/key = 2.68557 + built positions[0] for 83511239 keys; bits/key = 7 + lower 128; upper 256; num_bits_per_pos 8; keys_in_partition.size() 75062454 + building MPHF with 64 threads and 256 partitions... + built mphs[1] for 75062454 keys; bits/key = 2.63462 + built positions[1] for 75062454 keys; bits/key = 8 + lower 256; upper 512; num_bits_per_pos 9; keys_in_partition.size() 66750165 + building MPHF with 64 threads and 256 partitions... + built mphs[2] for 66750165 keys; bits/key = 2.63827 + built positions[2] for 66750165 keys; bits/key = 9 + lower 512; upper 1024; num_bits_per_pos 10; keys_in_partition.size() 56409346 + building MPHF with 64 threads and 256 partitions... + built mphs[3] for 56409346 keys; bits/key = 2.6742 + built positions[3] for 56409346 keys; bits/key = 10 + lower 1024; upper 2048; num_bits_per_pos 11; keys_in_partition.size() 47147744 + building MPHF with 64 threads and 256 partitions... + built mphs[4] for 47147744 keys; bits/key = 2.72151 + built positions[4] for 47147744 keys; bits/key = 11 + lower 2048; upper 4096; num_bits_per_pos 12; keys_in_partition.size() 37159719 + building MPHF with 64 threads and 256 partitions... + built mphs[5] for 37159719 keys; bits/key = 2.78763 + built positions[5] for 37159719 keys; bits/key = 12 + lower 4096; upper 181286; num_bits_per_pos 18; keys_in_partition.size() 90565466 + building MPHF with 64 threads and 256 partitions... + built mphs[6] for 90565466 keys; bits/key = 2.66514 + built positions[6] for 90565466 keys; bits/key = 18 +num_bits_for_skew_index 6166906640(1.04051 [bits/kmer]) +=== step 4: 'build_skew_index' 91.0231 [sec] (15.3579 [ns/kmer]) +=== total_time 992.17 [sec] (167.404 [ns/kmer]) +total index size: 5140629522 [B] -- 5140.63 [MB] +SPACE BREAKDOWN: + minimizers: 0.126222 [bits/kmer] (2.72945 [bits/key]) -- 1.81907% + pieces: 0.105874 [bits/kmer] -- 1.52582% + num_super_kmers_before_bucket: 0.0945179 [bits/kmer] -- 1.36216% + offsets: 2.32541 [bits/kmer] -- 33.513% + strings: 3.2463 [bits/kmer] -- 46.7845% + skew_index: 1.04051 [bits/kmer] -- 14.9955% + weights: 2.48364e-07 [bits/kmer] -- 3.57933e-06% + weight_interval_values: 4.31937e-08 [bits/kmer] + weight_interval_lengths: 1.61977e-07 [bits/kmer] + weight_dictionary: 4.31937e-08 [bits/kmer] + -------------- + total: 6.93884 [bits/kmer] + === bucket statistics (less) === +buckets with 1 super_kmers = 81.2581% +buckets with 2 super_kmers = 13.7215% +buckets with 3 super_kmers = 2.82286% +buckets with 4 super_kmers = 0.817497% +buckets with 5 super_kmers = 0.370932% +buckets with 6 super_kmers = 0.218227% +buckets with 7 super_kmers = 0.143864% +buckets with 8 super_kmers = 0.101593% +buckets with 9 super_kmers = 0.0752645% +buckets with 10 super_kmers = 0.0576569% +buckets with 11 super_kmers = 0.0455755% +buckets with 12 super_kmers = 0.0367424% +buckets with 13 super_kmers = 0.030229% +buckets with 14 super_kmers = 0.0255643% +buckets with 15 super_kmers = 0.0214462% +buckets with 16 super_kmers = 0.0185686% +max_num_super_kmers_in_bucket 181286 +2025-11-27 23:19:38: saving data structure to disk... +2025-11-27 23:19:40: DONE diff --git a/benchmarks/results-27-11-25-v3/k63/regular-build.time.log b/benchmarks/results-27-11-25-v3/k63/regular-build.time.log new file mode 100644 index 0000000..4315b46 --- /dev/null +++ b/benchmarks/results-27-11-25-v3/k63/regular-build.time.log @@ -0,0 +1,138 @@ + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/cod.k63.eulertigs.fa.gz -k 63 -m 24 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/cod.k63.sshash" + User time (seconds): 99.27 + System time (seconds): 2.19 + Percent of CPU this job got: 115% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:27.94 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 1670584 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 731917 + Voluntary context switches: 534 + Involuntary context switches: 899 + Swaps: 0 + File system inputs: 349872 + File system outputs: 2415768 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/kestrel.k63.eulertigs.fa.gz -k 63 -m 24 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/kestrel.k63.sshash" + User time (seconds): 183.09 + System time (seconds): 4.30 + Percent of CPU this job got: 106% + Elapsed (wall clock) time (h:mm:ss or m:ss): 2:55.80 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 2564996 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 1145242 + Voluntary context switches: 350 + Involuntary context switches: 1238 + Swaps: 0 + File system inputs: 658000 + File system outputs: 6827400 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/human.k63.eulertigs.fa.gz -k 63 -m 25 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/human.k63.sshash" + User time (seconds): 778.51 + System time (seconds): 10.43 + Percent of CPU this job got: 179% + Elapsed (wall clock) time (h:mm:ss or m:ss): 7:20.63 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 6080120 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 3108786 + Voluntary context switches: 396 + Involuntary context switches: 3918 + Swaps: 0 + File system inputs: 1667248 + File system outputs: 16982200 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/ncbi-virus.k63.eulertigs.fa.gz -k 63 -m 23 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/ncbi-virus.k63.sshash" + User time (seconds): 64.18 + System time (seconds): 1.69 + Percent of CPU this job got: 103% + Elapsed (wall clock) time (h:mm:ss or m:ss): 1:03.62 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 1222564 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 515716 + Voluntary context switches: 296 + Involuntary context switches: 426 + Swaps: 0 + File system inputs: 274400 + File system outputs: 1771104 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/se.k63.eulertigs.fa.gz -k 63 -m 31 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/se.k63.sshash" + User time (seconds): 392.76 + System time (seconds): 7.69 + Percent of CPU this job got: 172% + Elapsed (wall clock) time (h:mm:ss or m:ss): 3:51.84 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 4006940 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 2276076 + Voluntary context switches: 520 + Involuntary context switches: 2346 + Swaps: 0 + File system inputs: 1679744 + File system outputs: 11863104 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 + Command being timed: "./sshash build -i /mnt/hd2/pibiri/DNA/eulertigs/hprc.k63.eulertigs.fa.gz -k 63 -m 31 -t 64 --verbose -d /mnt/hd2/pibiri/DNA/eulertigs/tmp_dir -o /mnt/hd2/pibiri/DNA/sshash-v3-indexes/hprc.k63.sshash" + User time (seconds): 2760.73 + System time (seconds): 29.78 + Percent of CPU this job got: 280% + Elapsed (wall clock) time (h:mm:ss or m:ss): 16:35.70 + Average shared text size (kbytes): 0 + Average unshared data size (kbytes): 0 + Average stack size (kbytes): 0 + Average total size (kbytes): 0 + Maximum resident set size (kbytes): 14452508 + Average resident set size (kbytes): 0 + Major (requiring I/O) page faults: 0 + Minor (reclaiming a frame) page faults: 9026810 + Voluntary context switches: 565 + Involuntary context switches: 9053 + Swaps: 0 + File system inputs: 5973656 + File system outputs: 49125104 + Socket messages sent: 0 + Socket messages received: 0 + Signals delivered: 0 + Page size (bytes): 4096 + Exit status: 0 diff --git a/benchmarks/results-27-11-25-v3/k63/regular-streaming-queries-high-hit.log b/benchmarks/results-27-11-25-v3/k63/regular-streaming-queries-high-hit.log new file mode 100644 index 0000000..3c13b59 --- /dev/null +++ b/benchmarks/results-27-11-25-v3/k63/regular-streaming-queries-high-hit.log @@ -0,0 +1,48 @@ +2025-11-28 16:06:31: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR12858649.fastq.gz'... +2025-11-28 16:07:06: DONE +==== query report: +num_kmers = 97972416 +num_positive_kmers = 67275966 (68.6683%) +num_searches = 36382603/67275966 (54.0796%) +num_extensions = 30893363/67275966 (45.9204%) +elapsed = 35596.4 millisec / 35.5964 sec / 0.593273 min / 363.331 ns/kmer +2025-11-28 16:07:07: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR11449743_1.fastq.gz'... +2025-11-28 16:09:39: DONE +==== query report: +num_kmers = 461383839 +num_positive_kmers = 293470517 (63.6066%) +num_searches = 156910447/293470517 (53.4672%) +num_extensions = 136560070/293470517 (46.5328%) +elapsed = 151854 millisec / 151.854 sec / 2.5309 min / 329.128 ns/kmer +2025-11-28 16:09:39: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2025-11-28 16:13:20: DONE +==== query report: +num_kmers = 477818474 +num_positive_kmers = 406529529 (85.0803%) +num_searches = 229260476/406529529 (56.3945%) +num_extensions = 177269053/406529529 (43.6055%) +elapsed = 220627 millisec / 220.627 sec / 3.67711 min / 461.737 ns/kmer +2025-11-28 16:13:20: performing queries from file '/mnt/hd2/pibiri/DNA/queries/ncbi-queries.fastq.gz'... +2025-11-28 16:13:22: DONE +==== query report: +num_kmers = 10330949 +num_positive_kmers = 10230224 (99.025%) +num_searches = 5660421/10230224 (55.3304%) +num_extensions = 4569803/10230224 (44.6696%) +elapsed = 1903.3 millisec / 1.9033 sec / 0.0317216 min / 184.233 ns/kmer +2025-11-28 16:13:23: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR27871075_1.fastq.gz'... +2025-11-28 16:32:11: DONE +==== query report: +num_kmers = 541466405 +num_positive_kmers = 507202856 (93.6721%) +num_searches = 330216562/507202856 (65.1054%) +num_extensions = 176986294/507202856 (34.8946%) +elapsed = 1.12818e+06 millisec / 1128.18 sec / 18.8029 min / 2083.56 ns/kmer +2025-11-28 16:32:13: performing queries from file '/mnt/hd2/pibiri/DNA/queries/SRR5833294.fastq.gz'... +2025-11-28 16:36:06: DONE +==== query report: +num_kmers = 477818474 +num_positive_kmers = 434532302 (90.9409%) +num_searches = 250405853/434532302 (57.6265%) +num_extensions = 184126449/434532302 (42.3735%) +elapsed = 233571 millisec / 233.571 sec / 3.89284 min / 488.827 ns/kmer diff --git a/external/cityhash/cityhash.cpp b/external/cityhash/cityhash.cpp new file mode 100644 index 0000000..50cafc5 --- /dev/null +++ b/external/cityhash/cityhash.cpp @@ -0,0 +1,445 @@ +// Taken from: https://github.com/aappleby/smhasher/blob/master/src/City.cpp + +// Copyright (c) 2011 Google, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// CityHash, by Geoff Pike and Jyrki Alakuijala +// +// This file provides CityHash64() and related functions. +// +// It's probably possible to create even faster hash functions by +// writing a program that systematically explores some of the space of +// possible hash functions, by using SIMD instructions, or by +// compromising on hash quality. + +#include "cityhash.hpp" + +#include +#include // for memcpy and memset + +using namespace std; + +static uint64 UNALIGNED_LOAD64(const char* p) { + uint64 result; + memcpy(&result, p, sizeof(result)); + return result; +} + +static uint32 UNALIGNED_LOAD32(const char* p) { + uint32 result; + memcpy(&result, p, sizeof(result)); + return result; +} + +#ifndef __BIG_ENDIAN__ + +#define uint32_in_expected_order(x) (x) +#define uint64_in_expected_order(x) (x) + +#else + +#ifdef _MSC_VER +#include +#define bswap_32(x) _byteswap_ulong(x) +#define bswap_64(x) _byteswap_uint64(x) + +#elif defined(__APPLE__) +// Mac OS X / Darwin features +#include +#define bswap_32(x) OSSwapInt32(x) +#define bswap_64(x) OSSwapInt64(x) + +#else +#include +#endif + +#define uint32_in_expected_order(x) (bswap_32(x)) +#define uint64_in_expected_order(x) (bswap_64(x)) + +#endif // __BIG_ENDIAN__ + +#if !defined(LIKELY) +#if defined(__GNUC__) || defined(__INTEL_COMPILER) +#define LIKELY(x) (__builtin_expect(!!(x), 1)) +#else +#define LIKELY(x) (x) +#endif +#endif + +static uint64 Fetch64(const char* p) { return uint64_in_expected_order(UNALIGNED_LOAD64(p)); } + +static uint32 Fetch32(const char* p) { return uint32_in_expected_order(UNALIGNED_LOAD32(p)); } + +// Some primes between 2^63 and 2^64 for various uses. +static const uint64 k0 = 0xc3a5c85c97cb3127ULL; +static const uint64 k1 = 0xb492b66fbe98f273ULL; +static const uint64 k2 = 0x9ae16a3b2f90404fULL; +static const uint64 k3 = 0xc949d7c7509e6557ULL; + +// Bitwise right rotate. Normally this will compile to a single +// instruction, especially if the shift is a manifest constant. +static uint64 Rotate(uint64 val, int shift) { + // Avoid shifting by 64: doing so yields an undefined result. + return shift == 0 ? val : ((val >> shift) | (val << (64 - shift))); +} + +// Equivalent to Rotate(), but requires the second arg to be non-zero. +// On x86-64, and probably others, it's possible for this to compile +// to a single instruction if both args are already in registers. +static uint64 RotateByAtLeast1(uint64 val, int shift) { + return (val >> shift) | (val << (64 - shift)); +} + +static uint64 ShiftMix(uint64 val) { return val ^ (val >> 47); } + +static uint64 HashLen16(uint64 u, uint64 v) { return Hash128to64(uint128(u, v)); } + +static uint64 HashLen0to16(const char* s, size_t len) { + if (len > 8) { + uint64 a = Fetch64(s); + uint64 b = Fetch64(s + len - 8); + return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b; + } + if (len >= 4) { + uint64 a = Fetch32(s); + return HashLen16(len + (a << 3), Fetch32(s + len - 4)); + } + if (len > 0) { + uint8 a = s[0]; + uint8 b = s[len >> 1]; + uint8 c = s[len - 1]; + uint32 y = static_cast(a) + (static_cast(b) << 8); + uint32 z = len + (static_cast(c) << 2); + return ShiftMix(y * k2 ^ z * k3) * k2; + } + return k2; +} + +// This probably works well for 16-byte strings as well, but it may be overkill +// in that case. +static uint64 HashLen17to32(const char* s, size_t len) { + uint64 a = Fetch64(s) * k1; + uint64 b = Fetch64(s + 8); + uint64 c = Fetch64(s + len - 8) * k2; + uint64 d = Fetch64(s + len - 16) * k0; + return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d, a + Rotate(b ^ k3, 20) - c + len); +} + +// Return a 16-byte hash for 48 bytes. Quick and dirty. +// Callers do best to use "random-looking" values for a and b. +static pair WeakHashLen32WithSeeds(uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, + uint64 b) { + a += w; + b = Rotate(b + a + z, 21); + uint64 c = a; + a += x; + a += y; + b += Rotate(a, 44); + return make_pair(a + z, b + c); +} + +// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. +static pair WeakHashLen32WithSeeds(const char* s, uint64 a, uint64 b) { + return WeakHashLen32WithSeeds(Fetch64(s), Fetch64(s + 8), Fetch64(s + 16), Fetch64(s + 24), a, + b); +} + +// Return an 8-byte hash for 33 to 64 bytes. +static uint64 HashLen33to64(const char* s, size_t len) { + uint64 z = Fetch64(s + 24); + uint64 a = Fetch64(s) + (len + Fetch64(s + len - 16)) * k0; + uint64 b = Rotate(a + z, 52); + uint64 c = Rotate(a, 37); + a += Fetch64(s + 8); + c += Rotate(a, 7); + a += Fetch64(s + 16); + uint64 vf = a + z; + uint64 vs = b + Rotate(a, 31) + c; + a = Fetch64(s + 16) + Fetch64(s + len - 32); + z = Fetch64(s + len - 8); + b = Rotate(a + z, 52); + c = Rotate(a, 37); + a += Fetch64(s + len - 24); + c += Rotate(a, 7); + a += Fetch64(s + len - 16); + uint64 wf = a + z; + uint64 ws = b + Rotate(a, 31) + c; + uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0); + return ShiftMix(r * k0 + vs) * k2; +} + +uint64 CityHash64(const char* s, size_t len) { + if (len <= 32) { + if (len <= 16) { + return HashLen0to16(s, len); + } else { + return HashLen17to32(s, len); + } + } else if (len <= 64) { + return HashLen33to64(s, len); + } + + // For strings over 64 bytes we hash the end first, and then as we + // loop we keep 56 bytes of state: v, w, x, y, and z. + uint64 x = Fetch64(s + len - 40); + uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56); + uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24)); + pair v = WeakHashLen32WithSeeds(s + len - 64, len, z); + pair w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x); + x = x * k1 + Fetch64(s); + + // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks. + len = (len - 1) & ~static_cast(63); + do { + x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; + y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; + x ^= w.second; + y += v.first + Fetch64(s + 40); + z = Rotate(z + w.first, 33) * k1; + v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); + w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); + std::swap(z, x); + s += 64; + len -= 64; + } while (len != 0); + return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z, + HashLen16(v.second, w.second) + x); +} + +uint64 CityHash64WithSeed(const char* s, size_t len, uint64 seed) { + return CityHash64WithSeeds(s, len, k2, seed); +} + +uint64 CityHash64WithSeeds(const char* s, size_t len, uint64 seed0, uint64 seed1) { + return HashLen16(CityHash64(s, len) - seed0, seed1); +} + +// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings +// of any length representable in signed long. Based on City and Murmur. +static uint128 CityMurmur(const char* s, size_t len, uint128 seed) { + uint64 a = Uint128Low64(seed); + uint64 b = Uint128High64(seed); + uint64 c = 0; + uint64 d = 0; + signed long l = len - 16; + if (l <= 0) { // len <= 16 + a = ShiftMix(a * k1) * k1; + c = b * k1 + HashLen0to16(s, len); + d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c)); + } else { // len > 16 + c = HashLen16(Fetch64(s + len - 8) + k1, a); + d = HashLen16(b + len, c + Fetch64(s + len - 16)); + a += d; + do { + a ^= ShiftMix(Fetch64(s) * k1) * k1; + a *= k1; + b ^= a; + c ^= ShiftMix(Fetch64(s + 8) * k1) * k1; + c *= k1; + d ^= c; + s += 16; + l -= 16; + } while (l > 0); + } + a = HashLen16(a, c); + b = HashLen16(d, b); + return uint128(a ^ b, HashLen16(b, a)); +} + +uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed) { + if (len < 128) { return CityMurmur(s, len, seed); } + + // We expect len >= 128 to be the common case. Keep 56 bytes of state: + // v, w, x, y, and z. + pair v, w; + uint64 x = Uint128Low64(seed); + uint64 y = Uint128High64(seed); + uint64 z = len * k1; + v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s); + v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8); + w.first = Rotate(y + z, 35) * k1 + x; + w.second = Rotate(x + Fetch64(s + 88), 53) * k1; + + // This is the same inner loop as CityHash64(), manually unrolled. + do { + x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; + y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; + x ^= w.second; + y += v.first + Fetch64(s + 40); + z = Rotate(z + w.first, 33) * k1; + v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); + w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); + std::swap(z, x); + s += 64; + x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; + y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; + x ^= w.second; + y += v.first + Fetch64(s + 40); + z = Rotate(z + w.first, 33) * k1; + v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); + w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); + std::swap(z, x); + s += 64; + len -= 128; + } while (LIKELY(len >= 128)); + x += Rotate(v.first + z, 49) * k0; + z += Rotate(w.first, 37) * k0; + // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s. + for (size_t tail_done = 0; tail_done < len;) { + tail_done += 32; + y = Rotate(x + y, 42) * k0 + v.second; + w.first += Fetch64(s + len - tail_done + 16); + x = x * k0 + w.first; + z += w.second + Fetch64(s + len - tail_done); + w.second += v.first; + v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second); + } + // At this point our 56 bytes of state should contain more than + // enough information for a strong 128-bit hash. We use two + // different 56-byte-to-8-byte hashes to get a 16-byte final result. + x = HashLen16(x, v.first); + y = HashLen16(y + z, w.first); + return uint128(HashLen16(x + v.second, w.second) + y, HashLen16(x + w.second, y + v.second)); +} + +uint128 CityHash128(const char* s, size_t len) { + if (len >= 16) { + return CityHash128WithSeed(s + 16, len - 16, uint128(Fetch64(s) ^ k3, Fetch64(s + 8))); + } else if (len >= 8) { + return CityHash128WithSeed(NULL, 0, + uint128(Fetch64(s) ^ (len * k0), Fetch64(s + len - 8) ^ k1)); + } else { + return CityHash128WithSeed(s, len, uint128(k0, k1)); + } +} + +#if defined(__SSE4_2__) && defined(__x86_64__) +#include + +// Requires len >= 240. +static void CityHashCrc256Long(const char* s, size_t len, uint32 seed, uint64* result) { + uint64 a = Fetch64(s + 56) + k0; + uint64 b = Fetch64(s + 96) + k0; + uint64 c = result[0] = HashLen16(b, len); + uint64 d = result[1] = Fetch64(s + 120) * k0 + len; + uint64 e = Fetch64(s + 184) + seed; + uint64 f = seed; + uint64 g = 0; + uint64 h = 0; + uint64 i = 0; + uint64 j = 0; + uint64 t = c + d; + + // 240 bytes of input per iter. + size_t iters = len / 240; + len -= iters * 240; + do { +#define CHUNK(multiplier, z) \ + { \ + uint64 old_a = a; \ + a = Rotate(b, 41 ^ z) * multiplier + Fetch64(s); \ + b = Rotate(c, 27 ^ z) * multiplier + Fetch64(s + 8); \ + c = Rotate(d, 41 ^ z) * multiplier + Fetch64(s + 16); \ + d = Rotate(e, 33 ^ z) * multiplier + Fetch64(s + 24); \ + e = Rotate(t, 25 ^ z) * multiplier + Fetch64(s + 32); \ + t = old_a; \ + } \ + f = _mm_crc32_u64(f, a); \ + g = _mm_crc32_u64(g, b); \ + h = _mm_crc32_u64(h, c); \ + i = _mm_crc32_u64(i, d); \ + j = _mm_crc32_u64(j, e); \ + s += 40 + + CHUNK(1, 1); + CHUNK(k0, 0); + CHUNK(1, 1); + CHUNK(k0, 0); + CHUNK(1, 1); + CHUNK(k0, 0); + } while (--iters > 0); + + while (len >= 40) { + CHUNK(k0, 0); + len -= 40; + } + if (len > 0) { + s = s + len - 40; + CHUNK(k0, 0); + } + j += i << 32; + a = HashLen16(a, j); + h += g << 32; + b += h; + c = HashLen16(c, f) + i; + d = HashLen16(d, e + result[0]); + j += e; + i += HashLen16(h, t); + e = HashLen16(a, d) + j; + f = HashLen16(b, c) + a; + g = HashLen16(j, i) + c; + result[0] = e + f + g + h; + a = ShiftMix((a + g) * k0) * k0 + b; + result[1] += a + result[0]; + a = ShiftMix(a * k0) * k0 + c; + result[2] = a + result[1]; + a = ShiftMix((a + e) * k0) * k0; + result[3] = a + result[2]; +} + +// Requires len < 240. +static void CityHashCrc256Short(const char* s, size_t len, uint64* result) { + char buf[240]; + memcpy(buf, s, len); + memset(buf + len, 0, 240 - len); + CityHashCrc256Long(buf, 240, ~static_cast(len), result); +} + +void CityHashCrc256(const char* s, size_t len, uint64* result) { + if (LIKELY(len >= 240)) { + CityHashCrc256Long(s, len, 0, result); + } else { + CityHashCrc256Short(s, len, result); + } +} + +uint128 CityHashCrc128WithSeed(const char* s, size_t len, uint128 seed) { + if (len <= 900) { + return CityHash128WithSeed(s, len, seed); + } else { + uint64 result[4]; + CityHashCrc256(s, len, result); + uint64 u = Uint128High64(seed) + result[0]; + uint64 v = Uint128Low64(seed) + result[1]; + return uint128(HashLen16(u, v + result[2]), HashLen16(Rotate(v, 32), u * k0 + result[3])); + } +} + +uint128 CityHashCrc128(const char* s, size_t len) { + if (len <= 900) { + return CityHash128(s, len); + } else { + uint64 result[4]; + CityHashCrc256(s, len, result); + return uint128(result[2], result[3]); + } +} + +#endif \ No newline at end of file diff --git a/external/cityhash/cityhash.hpp b/external/cityhash/cityhash.hpp new file mode 100644 index 0000000..23fb877 --- /dev/null +++ b/external/cityhash/cityhash.hpp @@ -0,0 +1,115 @@ +// Taken from: https://github.com/aappleby/smhasher/blob/master/src/City.h + +// Copyright (c) 2011 Google, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// CityHash, by Geoff Pike and Jyrki Alakuijala +// +// This file provides a few functions for hashing strings. On x86-64 +// hardware in 2011, CityHash64() is faster than other high-quality +// hash functions, such as Murmur. This is largely due to higher +// instruction-level parallelism. CityHash64() and CityHash128() also perform +// well on hash-quality tests. +// +// CityHash128() is optimized for relatively long strings and returns +// a 128-bit hash. For strings more than about 2000 bytes it can be +// faster than CityHash64(). +// +// Functions in the CityHash family are not suitable for cryptography. +// +// WARNING: This code has not been tested on big-endian platforms! +// It is known to work well on little-endian platforms that have a small penalty +// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs. +// +// By the way, for some hash functions, given strings a and b, the hash +// of a+b is easily derived from the hashes of a and b. This property +// doesn't hold for any hash functions in this file. + +#ifndef CITY_HASH_H_ +#define CITY_HASH_H_ + +#include // for size_t. +#include + +// Microsoft Visual Studio may not have stdint.h. +#if defined(_MSC_VER) && (_MSC_VER < 1600) +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned __int64 uint64_t; +#else // defined(_MSC_VER) +#include +#endif // !defined(_MSC_VER) + +typedef uint8_t uint8; +typedef uint32_t uint32; +typedef uint64_t uint64; +typedef std::pair uint128; + +inline uint64 Uint128Low64(const uint128& x) { return x.first; } +inline uint64 Uint128High64(const uint128& x) { return x.second; } + +// Hash function for a byte array. +uint64 CityHash64(const char* buf, size_t len); + +// Hash function for a byte array. For convenience, a 64-bit seed is also +// hashed into the result. +uint64 CityHash64WithSeed(const char* buf, size_t len, uint64 seed); + +// Hash function for a byte array. For convenience, two seeds are also +// hashed into the result. +uint64 CityHash64WithSeeds(const char* buf, size_t len, uint64 seed0, uint64 seed1); + +// Hash function for a byte array. +uint128 CityHash128(const char* s, size_t len); + +// Hash function for a byte array. For convenience, a 128-bit seed is also +// hashed into the result. +uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed); + +// Hash 128 input bits down to 64 bits of output. +// This is intended to be a reasonably good hash function. +inline uint64 Hash128to64(const uint128& x) { + // Murmur-inspired hashing. + const uint64 kMul = 0x9ddfea08eb382d69ULL; + uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; + a ^= (a >> 47); + uint64 b = (Uint128High64(x) ^ a) * kMul; + b ^= (b >> 47); + b *= kMul; + return b; +} + +// Conditionally include declarations for versions of City that require SSE4.2 +// instructions to be available. +#if defined(__SSE4_2__) && defined(__x86_64__) + +// Hash function for a byte array. +uint128 CityHashCrc128(const char* s, size_t len); + +// Hash function for a byte array. For convenience, a 128-bit seed is also +// hashed into the result. +uint128 CityHashCrc128WithSeed(const char* s, size_t len, uint128 seed); + +// Hash function for a byte array. Sets result[0] ... result[3]. +void CityHashCrc256(const char* s, size_t len, uint64* result); + +#endif // __SSE4_2__ + +#endif // CITY_HASH_H_ \ No newline at end of file diff --git a/external/pthash b/external/pthash index cc4c9c9..cdeee6e 160000 --- a/external/pthash +++ b/external/pthash @@ -1 +1 @@ -Subproject commit cc4c9c9c7366f7ebe238b4b6493372e774608cbf +Subproject commit cdeee6ef1cc97d7cdb022e5d2d0013981335ee3e diff --git a/include/buckets.hpp b/include/buckets.hpp deleted file mode 100644 index f00ba4a..0000000 --- a/include/buckets.hpp +++ /dev/null @@ -1,316 +0,0 @@ -#pragma once - -#include "external/pthash/external/bits/include/elias_fano.hpp" - -#include "util.hpp" -#include "kmer_iterator.hpp" - -namespace sshash { - -template -struct buckets // -{ - lookup_result offset_to_id(const uint64_t offset, const uint64_t k) const { - auto p = pieces.locate(offset); - uint64_t contig_id = p.first.pos; - uint64_t contig_begin = p.first.val; - uint64_t contig_end = p.second.val; - - /* The following facts hold. */ - assert(offset >= contig_id * (k - 1)); - assert(contig_begin <= offset); - assert(offset < contig_end); - /****************************/ - - uint64_t absolute_kmer_id = offset - contig_id * (k - 1); - uint64_t relative_kmer_id = offset - contig_begin; - uint64_t contig_length = contig_end - contig_begin; - assert(contig_length >= k); - uint64_t contig_size = contig_length - k + 1; - - lookup_result res; - res.kmer_id = absolute_kmer_id; - res.kmer_id_in_contig = relative_kmer_id; - res.contig_id = contig_id; - res.contig_size = contig_size; - assert(contig_begin == res.contig_begin(k)); - assert(contig_end == res.contig_end(k)); - - return res; - } - - /* Return where the contig begins and ends in strings. */ - std::pair // [begin, end) - contig_offsets(const uint64_t contig_id) const { - uint64_t begin = pieces.access(contig_id); - uint64_t end = pieces.access(contig_id + 1); - assert(end > begin); - return {begin, end}; - } - - kmer_t contig_prefix(const uint64_t contig_id, const uint64_t k) const { - uint64_t contig_begin = pieces.access(contig_id); - return util::read_kmer_at(strings, k - 1, kmer_t::bits_per_char * contig_begin); - } - - kmer_t contig_suffix(const uint64_t contig_id, const uint64_t k) const { - uint64_t contig_end = pieces.access(contig_id + 1); - return util::read_kmer_at(strings, k - 1, - kmer_t::bits_per_char * (contig_end - k + 1)); - } - - std::pair locate_bucket(const uint64_t bucket_id) const { - uint64_t begin = bucket_sizes.access(bucket_id) + bucket_id; - uint64_t end = bucket_sizes.access(bucket_id + 1) + bucket_id + 1; - assert(begin < end); - return {begin, end}; - } - - lookup_result lookup(uint64_t begin, uint64_t end, kmer_t kmer, minimizer_info mini_info, - const uint64_t k, const uint64_t m) const // - { - { /* check minimizer first */ - uint64_t pos_in_seq = offsets.access(begin); - uint64_t read_mmer = uint64_t( - util::read_kmer_at(strings, m, kmer_t::bits_per_char * pos_in_seq)); - if (read_mmer != mini_info.minimizer) { - auto res = lookup_result(); - res.minimizer_found = false; - return res; - } - } - - for (uint64_t i = begin; i != end; ++i) { - auto res = lookup(i, kmer, mini_info, k, m); - if (res.kmer_id != constants::invalid_uint64) { - assert(is_valid(res)); - return res; - } - } - - return lookup_result(); - } - - lookup_result lookup(uint64_t i, kmer_t kmer, minimizer_info mini_info, const uint64_t k, - const uint64_t m) const // - { - (void)m; - uint64_t pos_in_seq = offsets.access(i); - if (pos_in_seq >= mini_info.pos_in_kmer) { - uint64_t offset = pos_in_seq - mini_info.pos_in_kmer; - auto res = offset_to_id(offset, k); - if (offset + k - 1 < res.contig_end(k)) { - auto read_kmer = - util::read_kmer_at(strings, k, kmer_t::bits_per_char * offset); - if (read_kmer == kmer) { - assert(is_valid(res)); - return res; - } - } - } - return lookup_result(); - } - - lookup_result lookup_canonical(uint64_t begin, uint64_t end, kmer_t kmer, kmer_t kmer_rc, - minimizer_info mini_info, const uint64_t k, - const uint64_t m) const // - { - { /* check minimizer first */ - uint64_t pos_in_seq = offsets.access(begin); - uint64_t read_mmer = uint64_t( - util::read_kmer_at(strings, m, kmer_t::bits_per_char * pos_in_seq)); - auto tmp = kmer_t(mini_info.minimizer); - tmp.reverse_complement_inplace(m); - uint64_t minimizer_rc = uint64_t(tmp); - if (read_mmer != mini_info.minimizer and read_mmer != minimizer_rc) { - auto res = lookup_result(); - res.minimizer_found = false; - return res; - } - } - - for (uint64_t i = begin; i != end; ++i) { - auto res = lookup_canonical(i, kmer, kmer_rc, mini_info, k, m); - if (res.kmer_id != constants::invalid_uint64) { - assert(is_valid(res)); - return res; - } - } - - return lookup_result(); - } - - lookup_result lookup_canonical(uint64_t i, kmer_t kmer, kmer_t kmer_rc, - minimizer_info mini_info, const uint64_t k, - const uint64_t m) const // - { - const uint64_t pos_in_seq = offsets.access(i); - uint64_t pos_in_kmer = mini_info.pos_in_kmer; - auto res = check_position(pos_in_seq, pos_in_kmer, kmer, kmer_rc, k); - if (res.kmer_id != constants::invalid_uint64) { - assert(is_valid(res)); - return res; - } - pos_in_kmer = k - m - mini_info.pos_in_kmer; - return check_position(pos_in_seq, pos_in_kmer, kmer, kmer_rc, k); - } - - lookup_result check_position(const uint64_t pos_in_seq, const uint64_t pos_in_kmer, // - const kmer_t kmer, const kmer_t kmer_rc, // - const uint64_t k) const // - { - if (pos_in_seq >= pos_in_kmer) { - uint64_t offset = pos_in_seq - pos_in_kmer; - auto res = offset_to_id(offset, k); - if (offset + k - 1 < res.contig_end(k)) { - auto read_kmer = - util::read_kmer_at(strings, k, kmer_t::bits_per_char * offset); - if (read_kmer == kmer) { - assert(is_valid(res)); - return res; - } - if (read_kmer == kmer_rc) { - assert(is_valid(res)); - res.kmer_orientation = constants::backward_orientation; - return res; - } - } - } - return lookup_result(); - } - - uint64_t id_to_offset(const uint64_t id, const uint64_t k) const { - constexpr uint64_t linear_scan_threshold = 8; - uint64_t lo = 0; - uint64_t hi = pieces.size() - 1; - assert(pieces.access(0) == 0); - while (lo < hi) { - if (hi - lo <= linear_scan_threshold) { - for (; lo < hi; ++lo) { - uint64_t val = pieces.access(lo) - lo * (k - 1); - if (val > id) break; - } - break; - } - uint64_t mid = lo + (hi - lo) / 2; - uint64_t val = pieces.access(mid); - assert(val >= mid * (k - 1)); - if (id <= val - mid * (k - 1)) { - hi = mid; - } else { - lo = mid + 1; - } - } - if (lo < pieces.size() and pieces.access(lo) - lo * (k - 1) > id) --lo; - return id + lo * (k - 1); - } - - void access(const uint64_t kmer_id, char* string_kmer, const uint64_t k) const { - uint64_t offset = id_to_offset(kmer_id, k); - auto read_kmer = util::read_kmer_at(strings, k, kmer_t::bits_per_char * offset); - util::uint_kmer_to_string(read_kmer, string_kmer, k); - } - - struct iterator { - iterator() {} - - iterator(buckets const* ptr, // - const uint64_t begin_kmer_id, const uint64_t end_kmer_id, // [begin,end) - const uint64_t k) - : m_buckets(ptr) - , m_begin_kmer_id(begin_kmer_id) - , m_end_kmer_id(end_kmer_id) - , m_k(k) - , m_it(ptr->strings, m_k) // - { - m_offset = m_buckets->id_to_offset(m_begin_kmer_id, k); - auto [pos, piece_end] = m_buckets->pieces.next_geq(m_offset); - if (piece_end == m_offset) pos += 1; - m_pieces_it = m_buckets->pieces.get_iterator_at(pos); - next_piece(); - m_ret.second.resize(m_k, 0); - } - - bool has_next() const { return m_begin_kmer_id != m_end_kmer_id; } - - std::pair next() { - if (m_offset == m_next_offset - m_k + 1) { - m_offset = m_next_offset; - next_piece(); - } - m_ret.first = m_begin_kmer_id; - if (m_clear) { - util::uint_kmer_to_string(m_it.get(), m_ret.second.data(), m_k); - assert(kmer_t::bits_per_char * m_offset == m_it.position()); - m_it.at(kmer_t::bits_per_char * (m_offset + m_k)); - } else { - memmove(m_ret.second.data(), m_ret.second.data() + 1, m_k - 1); - m_ret.second[m_k - 1] = kmer_t::uint64_to_char(m_it.get_next_char()); - } - m_clear = false; - ++m_begin_kmer_id; - ++m_offset; - return m_ret; - } - - private: - std::pair m_ret; - buckets const* m_buckets; - uint64_t m_begin_kmer_id, m_end_kmer_id; - uint64_t m_k; - uint64_t m_offset; - uint64_t m_next_offset; - kmer_iterator m_it; - bits::elias_fano::iterator m_pieces_it; - bool m_clear; - - void next_piece() { - m_it.at(kmer_t::bits_per_char * m_offset); - m_next_offset = m_pieces_it.value(); - assert(m_next_offset > m_offset); - m_clear = true; - m_pieces_it.next(); - } - }; - - iterator at(const uint64_t begin_kmer_id, const uint64_t end_kmer_id, const uint64_t k) const { - return iterator(this, begin_kmer_id, end_kmer_id, k); - } - - uint64_t num_bits() const { - return 8 * (pieces.num_bytes() + bucket_sizes.num_bytes() + offsets.num_bytes() + - strings.num_bytes()); - } - - template - void visit(Visitor& visitor) const { - visit_impl(visitor, *this); - } - - template - void visit(Visitor& visitor) { - visit_impl(visitor, *this); - } - - bits::elias_fano pieces; - bits::elias_fano bucket_sizes; - bits::compact_vector offsets; - bits::bit_vector strings; - -private: - template - static void visit_impl(Visitor& visitor, T&& t) { - visitor.visit(t.pieces); - visitor.visit(t.bucket_sizes); - visitor.visit(t.offsets); - visitor.visit(t.strings); - } - - bool is_valid(lookup_result res) const { - return res.contig_size != constants::invalid_uint64 and // - res.kmer_id_in_contig < res.contig_size and // - res.contig_id < pieces.size(); // - } -}; - -} // namespace sshash \ No newline at end of file diff --git a/include/buckets_statistics.hpp b/include/buckets_statistics.hpp index e0ba5e8..05e7a3e 100644 --- a/include/buckets_statistics.hpp +++ b/include/buckets_statistics.hpp @@ -51,7 +51,7 @@ struct buckets_statistics { uint64_t max_bucket_size() const { return m_max_bucket_size; } void print_full() const { - std::cout << " === bucket statistics (full) === \n"; + std::cout << "=== bucket statistics (full) === \n"; for (uint64_t bucket_size = 1, prev_bucket_size = 0, prev_kmers_in_buckets = 0, kmers_in_buckets = 0; bucket_size != MAX_BUCKET_SIZE + 1; ++bucket_size) { @@ -90,7 +90,7 @@ struct buckets_statistics { } } - std::cout << " === super_kmer statistics === \n"; + std::cout << "=== super_kmer statistics === \n"; uint64_t total_super_kmers = 0; uint64_t total_kmers = 0; for (uint64_t string_size = 1; string_size != MAX_STRING_SIZE + 1; ++string_size) { @@ -116,7 +116,7 @@ struct buckets_statistics { } void print_less() const { - std::cout << " === bucket statistics (less) === \n"; + std::cout << "=== bucket statistics (less) === \n"; for (uint64_t bucket_size = 1; bucket_size != 16 + 1; ++bucket_size) { if (m_bucket_sizes[bucket_size] > 0) { std::cout << "buckets with " << bucket_size << " minimizer positions = " @@ -124,7 +124,7 @@ struct buckets_statistics { << std::endl; } } - std::cout << "max_bucket_size " << m_max_bucket_size << std::endl; + std::cout << "max_bucket_size = " << m_max_bucket_size << std::endl; } void operator+=(buckets_statistics const& rhs) { diff --git a/include/builder/build_skew_index.hpp b/include/builder/build_skew_index.hpp deleted file mode 100644 index a2fefdf..0000000 --- a/include/builder/build_skew_index.hpp +++ /dev/null @@ -1,238 +0,0 @@ -#pragma once - -#include "external/pthash/include/pthash.hpp" - -namespace sshash { - -template -void build_skew_index(skew_index& m_skew_index, // - parse_data& data, // - buckets const& m_buckets, // - build_configuration const& build_config, // - buckets_statistics const& buckets_stats) // -{ - const uint64_t min_log2_size = m_skew_index.min_log2; - const uint64_t max_log2_size = m_skew_index.max_log2; - const uint64_t min_size = 1ULL << min_log2_size; - const uint64_t k = build_config.k; - assert(build_config.k > 0 and build_config.m <= build_config.k); - - m_skew_index.log2_max_bucket_size = std::ceil(std::log2(buckets_stats.max_bucket_size())); - - std::cout << "max_bucket_size " << buckets_stats.max_bucket_size() << std::endl; - std::cout << "log2_max_bucket_size " << m_skew_index.log2_max_bucket_size << std::endl; - - mm::file_source input(data.minimizers.get_minimizers_filename(), - mm::advice::sequential); - - uint64_t num_buckets_in_skew_index = 0; - uint64_t num_super_kmers_in_skew_index = 0; - for (minimizers_tuples_iterator it(input.data(), input.data() + input.size()); it.has_next(); - it.next()) // - { - auto bucket = it.bucket(); - if (bucket.size() > min_size) { - num_super_kmers_in_skew_index += bucket.num_super_kmers(); - ++num_buckets_in_skew_index; - } - } - std::cout << "num_buckets_in_skew_index " << num_buckets_in_skew_index << "/" - << buckets_stats.num_buckets() << " (" - << (num_buckets_in_skew_index * 100.0) / buckets_stats.num_buckets() << "%)" - << std::endl; - - if (num_buckets_in_skew_index == 0) { - input.close(); - return; - } - - std::vector buckets; - buckets.reserve(num_buckets_in_skew_index); - std::vector tuples; // backed memory - tuples.reserve(num_super_kmers_in_skew_index); - for (minimizers_tuples_iterator it(input.data(), input.data() + input.size()); it.has_next(); - it.next()) // - { - auto bucket = it.bucket(); - if (bucket.size() > min_size) { - minimizer_tuple const* begin = tuples.data() + tuples.size(); - std::copy(bucket.begin_ptr(), bucket.end_ptr(), std::back_inserter(tuples)); - minimizer_tuple const* end = tuples.data() + tuples.size(); - buckets.push_back(bucket_type(begin, end)); - } - } - assert(buckets.size() == num_buckets_in_skew_index); - input.close(); - - std::sort(buckets.begin(), buckets.end(), - [](bucket_type const& x, bucket_type const& y) { return x.size() < y.size(); }); - - uint64_t num_partitions = max_log2_size - min_log2_size + 1; - if (buckets_stats.max_bucket_size() < (1ULL << max_log2_size)) { - num_partitions = m_skew_index.log2_max_bucket_size - min_log2_size; - } - std::cout << "num_partitions " << num_partitions << std::endl; - - std::vector num_kmers(num_partitions, 0); - m_skew_index.mphfs.resize(num_partitions); - m_skew_index.positions.resize(num_partitions); - - { - std::cout << "computing sizes of partitions..." << std::endl; - - uint64_t partition_id = 0; - uint64_t lower = min_size; - uint64_t upper = 2 * lower; - uint64_t num_kmers_in_skew_index = 0; - for (uint64_t i = 0; i <= buckets.size(); ++i) { - while (i == buckets.size() or buckets[i].size() > upper) { - std::cout << " partition_id = " << partition_id - << ": num_kmers belonging to buckets of size > " << lower - << " and <= " << upper << ": " << num_kmers[partition_id] << std::endl; - num_kmers_in_skew_index += num_kmers[partition_id]; - - if (i == buckets.size()) break; - - lower = upper; - upper = 2 * lower; - partition_id += 1; - if (partition_id == num_partitions - 1) { upper = buckets_stats.max_bucket_size(); } - } - - if (i == buckets.size()) break; - - assert(buckets[i].size() > lower and buckets[i].size() <= upper); - for (auto mt : buckets[i]) { num_kmers[partition_id] += mt.num_kmers_in_super_kmer; } - } - assert(partition_id == num_partitions - 1); - std::cout << "num_kmers_in_skew_index " << num_kmers_in_skew_index << " (" - << (num_kmers_in_skew_index * 100.0) / buckets_stats.num_kmers() << "%)" - << std::endl; - assert(num_kmers_in_skew_index == - std::accumulate(num_kmers.begin(), num_kmers.end(), uint64_t(0))); - } - - { - std::cout << "building partitions..." << std::endl; - - pthash::build_configuration mphf_build_config; - mphf_build_config.lambda = build_config.lambda; - mphf_build_config.alpha = 0.94; - mphf_build_config.seed = util::get_seed_for_hash_function(build_config); - mphf_build_config.verbose = false; - mphf_build_config.num_threads = build_config.num_threads; - mphf_build_config.avg_partition_size = constants::avg_partition_size; - - uint64_t partition_id = 0; - uint64_t lower = min_size; - uint64_t upper = 2 * lower; - uint64_t num_bits_per_pos = min_log2_size + 1; - - /* Temporary storage for kmers and positions within a partition. */ - std::vector kmers; - std::vector positions_in_bucket; - bits::compact_vector::builder cvb_positions; - kmers.reserve(num_kmers[partition_id]); - positions_in_bucket.reserve(num_kmers[partition_id]); - cvb_positions.resize(num_kmers[partition_id], num_bits_per_pos); - - for (uint64_t i = 0; i <= buckets.size(); ++i) { - while (i == buckets.size() or buckets[i].size() > upper) { - std::cout << " lower = " << lower << "; upper = " << upper - << "; num_bits_per_pos = " << num_bits_per_pos - << "; num_kmers_in_partition = " << kmers.size() << std::endl; - assert(num_kmers[partition_id] == kmers.size()); - assert(num_kmers[partition_id] == positions_in_bucket.size()); - - if (num_kmers[partition_id] > 0) // - { - if (build_config.verbose) { - const uint64_t avg_partition_size = - pthash::compute_avg_partition_size(kmers.size(), mphf_build_config); - const uint64_t num_partitions = - pthash::compute_num_partitions(kmers.size(), avg_partition_size); - assert(num_partitions > 0); - std::cout << " building MPHF with " << mphf_build_config.num_threads - << " threads and " << num_partitions - << " partitions (avg. partition size = " << avg_partition_size - << ")..." << std::endl; - } - - auto& mphf = m_skew_index.mphfs[partition_id]; - mphf.build_in_internal_memory(kmers.begin(), kmers.size(), mphf_build_config); - - std::cout << " built mphs[" << partition_id << "] for " << kmers.size() - << " kmers; bits/key = " - << static_cast(mphf.num_bits()) / mphf.num_keys() - << std::endl; - - for (uint64_t i = 0; i != kmers.size(); ++i) { - kmer_t kmer = kmers[i]; - uint64_t pos = mphf(kmer); - uint32_t pos_in_bucket = positions_in_bucket[i]; - cvb_positions.set(pos, pos_in_bucket); - } - auto& positions = m_skew_index.positions[partition_id]; - cvb_positions.build(positions); - - std::cout << " built positions[" << partition_id << "] for " - << positions.size() << " kmers; bits/key = " - << (positions.num_bytes() * 8.0) / positions.size() << std::endl; - } - - if (i == buckets.size()) break; - - lower = upper; - upper = 2 * lower; - num_bits_per_pos += 1; - partition_id += 1; - if (partition_id == num_partitions - 1) { - upper = buckets_stats.max_bucket_size(); - num_bits_per_pos = m_skew_index.log2_max_bucket_size; - } - - kmers.clear(); - positions_in_bucket.clear(); - kmers.reserve(num_kmers[partition_id]); - positions_in_bucket.reserve(num_kmers[partition_id]); - cvb_positions.resize(num_kmers[partition_id], num_bits_per_pos); - } - - if (i == buckets.size()) break; - - assert(buckets[i].size() > lower and buckets[i].size() <= upper); - uint64_t pos_in_bucket = -1; - uint64_t prev_pos_in_seq = constants::invalid_uint64; - for (auto mt : buckets[i]) // - { - if (mt.pos_in_seq != prev_pos_in_seq) { - prev_pos_in_seq = mt.pos_in_seq; - ++pos_in_bucket; - } - assert(mt.pos_in_seq >= mt.pos_in_kmer); - const uint64_t starting_pos_of_super_kmer = mt.pos_in_seq - mt.pos_in_kmer; - kmer_iterator it(m_buckets.strings, k, - kmer_t::bits_per_char * starting_pos_of_super_kmer); - for (uint64_t i = 0; i != mt.num_kmers_in_super_kmer; ++i) { - auto kmer = it.get(); - if (build_config.canonical) { /* take the canonical kmer */ - auto kmer_rc = kmer; - kmer_rc.reverse_complement_inplace(k); - kmer = std::min(kmer, kmer_rc); - } - kmers.push_back(kmer); - positions_in_bucket.push_back(pos_in_bucket); - it.next(); - } - assert(pos_in_bucket < (1ULL << cvb_positions.width())); - } - } - assert(partition_id == num_partitions - 1); - } - - std::cout << "num_bits_for_skew_index " << m_skew_index.num_bits() << "(" - << static_cast(m_skew_index.num_bits()) / buckets_stats.num_kmers() - << " [bits/kmer])" << std::endl; -} - -} // namespace sshash \ No newline at end of file diff --git a/include/builder/build_sparse_and_skew_index.cpp b/include/builder/build_sparse_and_skew_index.cpp new file mode 100644 index 0000000..9e9f1ce --- /dev/null +++ b/include/builder/build_sparse_and_skew_index.cpp @@ -0,0 +1,432 @@ +#include "dictionary_builder.hpp" +#include "include/buckets_statistics.hpp" + +namespace sshash { + +template +void dictionary_builder::build_sparse_and_skew_index( + dictionary& d) // +{ + essentials::timer_type timer; + timer.start(); + + const uint64_t num_minimizer_positions = minimizers.num_minimizer_positions(); + const uint64_t num_minimizers = minimizers.num_minimizers(); + const uint64_t min_size = 1ULL << constants::min_l; + const uint64_t num_bits_per_offset = strings_offsets_builder.num_bits_per_offset(); + + if (build_config.verbose) { + std::cout << "num_bits_per_offset = " << num_bits_per_offset << std::endl; + } + + bits::compact_vector::builder control_codewords_builder; + control_codewords_builder.resize(num_minimizers, num_bits_per_offset + 1); + + mm::file_source input(minimizers.get_minimizers_filename(), + mm::advice::sequential); + + buckets_statistics buckets_stats(num_minimizers, num_kmers, num_minimizer_positions); + + uint64_t num_buckets_larger_than_1_not_in_skew_index = 0; + uint64_t num_buckets_in_skew_index = 0; + uint64_t num_super_kmers_in_buckets_larger_than_1 = 0; + uint64_t num_minimizer_positions_of_buckets_larger_than_1 = 0; + uint64_t num_minimizer_positions_of_buckets_in_skew_index = 0; + + for (minimizers_tuples_iterator it(input.data(), input.data() + input.size()); // + it.has_next(); it.next()) // + { + const uint64_t bucket_id = it.minimizer(); + assert(bucket_id < num_minimizers); + auto bucket = it.bucket(); + const uint64_t bucket_size = bucket.size(); + buckets_stats.add_bucket_size(bucket_size); + + if (bucket_size > 1) { + if (bucket_size <= min_size) { + ++num_buckets_larger_than_1_not_in_skew_index; + num_minimizer_positions_of_buckets_larger_than_1 += bucket_size; + } else { + ++num_buckets_in_skew_index; + num_minimizer_positions_of_buckets_in_skew_index += bucket_size; + } + num_super_kmers_in_buckets_larger_than_1 += bucket.num_super_kmers(); + } + + uint64_t prev_pos_in_seq = constants::invalid_uint64; + for (auto mt : bucket) { + if (bucket_size == 1 and mt.pos_in_seq != prev_pos_in_seq) { + /* + For minimizers occurring once, store a (log(N)+1)-bit + code, as follows: |offset|0|, i.e., the LSB is 0. + */ + uint64_t code = mt.pos_in_seq << 1; // first LS bit encodes status code: 0 + assert(code < (uint64_t(1) << (num_bits_per_offset + 1))); + control_codewords_builder.set(bucket_id, code); + prev_pos_in_seq = mt.pos_in_seq; + } + buckets_stats.add_num_kmers_in_super_kmer(bucket_size, mt.num_kmers_in_super_kmer); + } + } + + assert(buckets_stats.num_buckets() == num_minimizers); + + strings_offsets_builder.build(d.m_spss.strings_offsets); + strings_builder.build(d.m_spss.strings); + + /* step 1. build sparse index */ + assert(buckets_stats.num_buckets() == num_minimizers); + + const uint64_t max_bucket_size = buckets_stats.max_bucket_size(); + const uint64_t log2_max_bucket_size = std::ceil(std::log2(max_bucket_size)); + + if (build_config.verbose) { + std::cout << "num_buckets_larger_than_1_not_in_skew_index " + << num_buckets_larger_than_1_not_in_skew_index << "/" + << buckets_stats.num_buckets() << " (" + << (num_buckets_larger_than_1_not_in_skew_index * 100.0) / + buckets_stats.num_buckets() + << "%)" << std::endl; + std::cout << "num_buckets_in_skew_index " << num_buckets_in_skew_index << "/" + << buckets_stats.num_buckets() << " (" + << (num_buckets_in_skew_index * 100.0) / buckets_stats.num_buckets() << "%)" + << std::endl; + std::cout << "max_bucket_size " << max_bucket_size << std::endl; + std::cout << "log2_max_bucket_size " << log2_max_bucket_size << std::endl; + } + + std::vector buckets; + buckets.reserve(num_buckets_larger_than_1_not_in_skew_index + num_buckets_in_skew_index); + std::vector tuples; // backed memory + tuples.reserve(num_super_kmers_in_buckets_larger_than_1); + + for (minimizers_tuples_iterator it(input.data(), input.data() + input.size()); // + it.has_next(); it.next()) // + { + auto bucket = it.bucket(); + if (bucket.size() > 1) { + minimizer_tuple const* begin = tuples.data() + tuples.size(); + std::copy(bucket.begin_ptr(), bucket.end_ptr(), std::back_inserter(tuples)); + minimizer_tuple const* end = tuples.data() + tuples.size(); + buckets.push_back(bucket_type(begin, end)); + } + } + assert(buckets.size() == + num_buckets_larger_than_1_not_in_skew_index + num_buckets_in_skew_index); + + input.close(); + + std::sort(buckets.begin(), buckets.end(), + [](bucket_type const& x, bucket_type const& y) { return x.size() < y.size(); }); + + uint64_t num_partitions = constants::max_l - constants::min_l + 1; + if (max_bucket_size < min_size) { + num_partitions = 0; + } else if (max_bucket_size < (1ULL << constants::max_l)) { + num_partitions = log2_max_bucket_size - constants::min_l; + } + assert(num_partitions <= 8); // so that we need 3 bits to encode a partition_id + + if (build_config.verbose) { + std::cout << "num_partitions in skew index " << num_partitions << std::endl; + std::cout << "num_minimizer_positions_of_buckets_larger_than_1 " + << num_minimizer_positions_of_buckets_larger_than_1 << "/" + << num_minimizer_positions << " (" + << (num_minimizer_positions_of_buckets_larger_than_1 * 100.0) / + num_minimizer_positions + << "%)" << std::endl; + std::cout << "num_minimizer_positions_of_buckets_in_skew_index " + << num_minimizer_positions_of_buckets_in_skew_index << "/" + << num_minimizer_positions << " (" + << (num_minimizer_positions_of_buckets_in_skew_index * 100.0) / + num_minimizer_positions + << "%)" << std::endl; + } + + bits::compact_vector::builder mid_load_buckets_builder; + bits::compact_vector::builder heavy_load_buckets_builder; + mid_load_buckets_builder.resize(num_minimizer_positions_of_buckets_larger_than_1, + num_bits_per_offset); + heavy_load_buckets_builder.resize(num_minimizer_positions_of_buckets_in_skew_index, + num_bits_per_offset); + + d.m_ssi.begin_buckets_of_size.resize(min_size + 1, 0); + + { + uint64_t curr_bucket_size = 2; + uint64_t list_id = 0; + uint64_t mid_load_buckets_size = 0; + uint64_t heavy_load_buckets_size = 0; + + uint64_t partition_id = 0; + uint64_t lower = min_size; + uint64_t upper = 2 * lower; + + for (auto bucket : buckets) { + const uint64_t bucket_size = bucket.size(); + assert(bucket_size >= 2); + + if (bucket_size > curr_bucket_size) { + while (bucket_size > curr_bucket_size) ++curr_bucket_size; + if (curr_bucket_size <= min_size) { + d.m_ssi.begin_buckets_of_size[curr_bucket_size] = mid_load_buckets_size; + } else { + while (curr_bucket_size > upper) { + lower = upper; + upper = 2 * lower; + partition_id += 1; + if (partition_id == num_partitions - 1) upper = max_bucket_size; + } + } + list_id = 0; + } + + if (curr_bucket_size <= min_size) { + uint64_t prev_pos_in_seq = constants::invalid_uint64; + for (auto mt : bucket) { + if (prev_pos_in_seq == constants::invalid_uint64) { // only once + uint64_t p = (list_id << constants::min_l) | (curr_bucket_size - 2); + uint64_t code = (p << 2) | 1; // first two LS bits encode status code: 01 + assert(code < (uint64_t(1) << (num_bits_per_offset + 1))); + control_codewords_builder.set(mt.minimizer, code); + } + if (mt.pos_in_seq != prev_pos_in_seq) { + mid_load_buckets_builder.push_back(mt.pos_in_seq); + prev_pos_in_seq = mt.pos_in_seq; + mid_load_buckets_size += 1; + } + } + ++list_id; + } else { + uint64_t prev_pos_in_seq = constants::invalid_uint64; + for (auto mt : bucket) { + if (prev_pos_in_seq == constants::invalid_uint64) { // only once + assert(partition_id < 8); + uint64_t p = (heavy_load_buckets_size << 3) | partition_id; + uint64_t code = (p << 2) | 3; // first two LS bits encode status code: 11 + assert(code < (uint64_t(1) << (num_bits_per_offset + 1))); + control_codewords_builder.set(mt.minimizer, code); + } + if (mt.pos_in_seq != prev_pos_in_seq) { + heavy_load_buckets_builder.push_back(mt.pos_in_seq); + prev_pos_in_seq = mt.pos_in_seq; + heavy_load_buckets_size += 1; + } + } + } + } + } + + control_codewords_builder.build(d.m_ssi.codewords.control_codewords); + mid_load_buckets_builder.build(d.m_ssi.mid_load_buckets); + heavy_load_buckets_builder.build(d.m_ssi.ski.heavy_load_buckets); + + timer.stop(); + + build_stats.add("step 7.1 (build sparse index)", uint64_t(timer.elapsed())); + + if (build_config.verbose) { + print_time(uint64_t(timer.elapsed()), "step 7.1 (build sparse index)"); + } + + timer.reset(); + + if (num_buckets_in_skew_index == 0) { + if (build_config.verbose) buckets_stats.print_less(); + return; + } + + /* step 2. build skew index */ + timer.start(); + std::vector num_kmers_in_partition(num_partitions, 0); + d.m_ssi.ski.mphfs.resize(num_partitions); + d.m_ssi.ski.positions.resize(num_partitions); + + { + uint64_t partition_id = 0; + uint64_t lower = min_size; + uint64_t upper = 2 * lower; + uint64_t num_kmers_in_skew_index = 0; + + for (uint64_t i = buckets.size() - num_buckets_in_skew_index; i <= buckets.size(); ++i) // + { + auto const& bucket = buckets[i]; + while (i == buckets.size() or bucket.size() > upper) // + { + if (build_config.verbose) { + std::cout << " partition = " << partition_id + << ": num kmers in buckets of size > " << lower << " and <= " << upper + << ": " << num_kmers_in_partition[partition_id] << std::endl; + } + + num_kmers_in_skew_index += num_kmers_in_partition[partition_id]; + + if (i == buckets.size()) break; + + lower = upper; + upper = 2 * lower; + partition_id += 1; + if (partition_id == num_partitions - 1) upper = max_bucket_size; + } + + if (i == buckets.size()) break; + + assert(bucket.size() > lower and bucket.size() <= upper); + for (auto mt : bucket) { + num_kmers_in_partition[partition_id] += mt.num_kmers_in_super_kmer; + } + } + assert(partition_id == num_partitions - 1); + + if (build_config.verbose) { + std::cout << "num kmers in skew index = " << num_kmers_in_skew_index << " (" + << (num_kmers_in_skew_index * 100.0) / buckets_stats.num_kmers() << "%)" + << std::endl; + } + + assert(num_kmers_in_skew_index == std::accumulate(num_kmers_in_partition.begin(), + num_kmers_in_partition.end(), + uint64_t(0))); + } + + { + pthash::build_configuration mphf_build_config; + mphf_build_config.lambda = + build_config.lambda + 2.0; /* Use higher lambda here since we have less keys. */ + mphf_build_config.alpha = 0.94; + mphf_build_config.seed = util::get_seed_for_hash_function(build_config); + mphf_build_config.verbose = false; + mphf_build_config.num_threads = build_config.num_threads; + mphf_build_config.avg_partition_size = constants::avg_partition_size; + + uint64_t partition_id = 0; + uint64_t lower = min_size; + uint64_t upper = 2 * lower; + uint64_t num_bits_per_pos = constants::min_l + 1; + + /* Temporary storage for kmers and positions within a partition. */ + std::vector kmers; + std::vector positions_in_bucket; + bits::compact_vector::builder cvb_positions; + kmers.reserve(num_kmers_in_partition[partition_id]); + positions_in_bucket.reserve(num_kmers_in_partition[partition_id]); + cvb_positions.resize(num_kmers_in_partition[partition_id], num_bits_per_pos); + + for (uint64_t i = buckets.size() - num_buckets_in_skew_index, k = build_config.k; + i <= buckets.size(); ++i) // + { + auto const& bucket = buckets[i]; + while (i == buckets.size() or bucket.size() > upper) // + { + if (build_config.verbose) { + std::cout << " lower = " << lower << "; upper = " << upper + << "; num_bits_per_pos = " << num_bits_per_pos + << "; num_kmers_in_partition = " << kmers.size() << std::endl; + } + assert(num_kmers_in_partition[partition_id] == kmers.size()); + assert(num_kmers_in_partition[partition_id] == positions_in_bucket.size()); + + if (num_kmers_in_partition[partition_id] > 0) // + { + if (build_config.verbose) { + const uint64_t avg_partition_size = + pthash::compute_avg_partition_size(kmers.size(), mphf_build_config); + const uint64_t num_partitions = + pthash::compute_num_partitions(kmers.size(), avg_partition_size); + assert(num_partitions > 0); + std::cout << " building MPHF with " << mphf_build_config.num_threads + << " threads and " << num_partitions + << " partitions (avg. partition size = " << avg_partition_size + << ")..." << std::endl; + } + + auto& mphf = d.m_ssi.ski.mphfs[partition_id]; + mphf.build_in_internal_memory(kmers.begin(), kmers.size(), mphf_build_config); + + if (build_config.verbose) { + std::cout << " built mphs[" << partition_id << "] for " << kmers.size() + << " kmers; bits/key = " + << static_cast(mphf.num_bits()) / mphf.num_keys() + << std::endl; + } + + for (uint64_t i = 0; i != kmers.size(); ++i) { + Kmer kmer = kmers[i]; + uint64_t pos = mphf(kmer); + uint32_t pos_in_bucket = positions_in_bucket[i]; + cvb_positions.set(pos, pos_in_bucket); + } + auto& positions = d.m_ssi.ski.positions[partition_id]; + cvb_positions.build(positions); + + if (build_config.verbose) { + std::cout << " built positions[" << partition_id << "] for " + << positions.size() << " kmers; bits/key = " + << (positions.num_bytes() * 8.0) / positions.size() << std::endl; + } + } + + if (i == buckets.size()) break; + + lower = upper; + upper = 2 * lower; + num_bits_per_pos += 1; + partition_id += 1; + if (partition_id == num_partitions - 1) { + upper = max_bucket_size; + num_bits_per_pos = log2_max_bucket_size; + } + + kmers.clear(); + positions_in_bucket.clear(); + kmers.reserve(num_kmers_in_partition[partition_id]); + positions_in_bucket.reserve(num_kmers_in_partition[partition_id]); + cvb_positions.resize(num_kmers_in_partition[partition_id], num_bits_per_pos); + } + + if (i == buckets.size()) break; + + assert(bucket.size() > lower and bucket.size() <= upper); + uint64_t pos_in_bucket = -1; + uint64_t prev_pos_in_seq = constants::invalid_uint64; + for (auto mt : bucket) // + { + if (mt.pos_in_seq != prev_pos_in_seq) { + prev_pos_in_seq = mt.pos_in_seq; + ++pos_in_bucket; + } + assert(mt.pos_in_seq >= mt.pos_in_kmer); + + mt.pos_in_seq = d.m_spss.strings_offsets.decode(mt.pos_in_seq).absolute_offset; + + const uint64_t starting_pos_of_super_kmer = mt.pos_in_seq - mt.pos_in_kmer; + kmer_iterator it( + d.m_spss.strings, k, Kmer::bits_per_char * starting_pos_of_super_kmer); + for (uint64_t i = 0; i != mt.num_kmers_in_super_kmer; ++i) { + auto kmer = it.get(); + if (build_config.canonical) { /* take the canonical kmer */ + auto kmer_rc = kmer; + kmer_rc.reverse_complement_inplace(k); + kmer = std::min(kmer, kmer_rc); + } + kmers.push_back(kmer); + positions_in_bucket.push_back(pos_in_bucket); + it.next(); + } + assert(pos_in_bucket < (1ULL << cvb_positions.width())); + } + } + assert(partition_id == num_partitions - 1); + } + + timer.stop(); + + build_stats.add("step 7.2 (build skew index)", uint64_t(timer.elapsed())); + + if (build_config.verbose) { + print_time(uint64_t(timer.elapsed()), "step 7.2 (build skew index)"); + buckets_stats.print_less(); + } +} + +} // namespace sshash \ No newline at end of file diff --git a/include/builder/build_sparse_index.hpp b/include/builder/build_sparse_index.hpp deleted file mode 100644 index dd3530a..0000000 --- a/include/builder/build_sparse_index.hpp +++ /dev/null @@ -1,102 +0,0 @@ -#pragma once - -#include "include/buckets_statistics.hpp" - -namespace sshash { - -struct bucket_size_iterator { - using iterator_category = std::forward_iterator_tag; - - bucket_size_iterator(minimizer_tuple const* begin, minimizer_tuple const* end) - : m_val(0) // first returned value is always 0 - , m_it(begin, end) {} - - uint64_t operator*() const { return m_val; } - - void operator++() { - if (!m_it.has_next()) return; - uint64_t size = m_it.bucket().size(); - assert(size > 0); - m_val += size - 1; // directly compute the cumulative sum - m_it.next(); - } - -private: - uint64_t m_val; - minimizers_tuples_iterator m_it; -}; - -template -buckets_statistics build_sparse_index(parse_data& data, buckets& m_buckets, - build_configuration const& /*build_config*/) // -{ - const uint64_t num_kmers = data.num_kmers; - const uint64_t num_minimizer_positions = data.minimizers.num_minimizer_positions(); - const uint64_t num_buckets = data.minimizers.num_minimizers(); - - - std::cout << "bits_per_offset = ceil(log2(" << data.strings.num_bits() / kmer_t::bits_per_char - << ")) = " << std::ceil(std::log2(data.strings.num_bits() / kmer_t::bits_per_char)) - << std::endl; - - std::cout << "reading from '" << data.minimizers.get_minimizers_filename() << "'..." - << std::endl; - mm::file_source input(data.minimizers.get_minimizers_filename(), - mm::advice::sequential); - minimizer_tuple const* begin = input.data(); - minimizer_tuple const* end = input.data() + input.size(); - - essentials::timer_type timer; - timer.start(); - { - bucket_size_iterator iterator(begin, end); - m_buckets.bucket_sizes.encode(iterator, num_buckets + 1, - num_minimizer_positions - num_buckets); - } - timer.stop(); - std::cout << "encoding bucket sizes: " << timer.elapsed() / 1000000 << " [sec]" << std::endl; - - timer.reset(); - - buckets_statistics buckets_stats(num_buckets, num_kmers, num_minimizer_positions); - - timer.start(); - - bits::compact_vector::builder offsets_builder; - offsets_builder.resize(num_minimizer_positions, - std::ceil(std::log2(data.strings.num_bits() / kmer_t::bits_per_char))); - uint64_t prev_minimizer = constants::invalid_uint64, prev_pos_in_seq = constants::invalid_uint64, bucket_size = 0; - for (auto mt : input) { - if (mt.minimizer != prev_minimizer) { - auto [bucket_begin, bucket_end] = m_buckets.locate_bucket(mt.minimizer); - bucket_size = bucket_end - bucket_begin; - buckets_stats.add_bucket_size(bucket_size); - prev_minimizer = mt.minimizer; - prev_pos_in_seq = constants::invalid_uint64; - } - buckets_stats.add_num_kmers_in_super_kmer(bucket_size, mt.num_kmers_in_super_kmer); - if (mt.pos_in_seq != prev_pos_in_seq) { - offsets_builder.push_back(mt.pos_in_seq); - prev_pos_in_seq = mt.pos_in_seq; - } - } - - input.close(); - timer.stop(); - std::cout << "computing minimizers offsets: " << timer.elapsed() / 1000000 << " [sec]" - << std::endl; - - timer.reset(); - - timer.start(); - m_buckets.pieces.encode(data.pieces.begin(), data.pieces.size(), data.pieces.back()); - offsets_builder.build(m_buckets.offsets); - m_buckets.strings.swap(data.strings); - timer.stop(); - std::cout << "encoding string boundaries and building offsets: " << timer.elapsed() / 1000000 - << " [sec]" << std::endl; - - return buckets_stats; -} - -} // namespace sshash diff --git a/include/builder/compute_minimizer_tuples.cpp b/include/builder/compute_minimizer_tuples.cpp new file mode 100644 index 0000000..91ec162 --- /dev/null +++ b/include/builder/compute_minimizer_tuples.cpp @@ -0,0 +1,121 @@ +#include "dictionary_builder.hpp" +#include "util.hpp" +#include "include/kmer_iterator.hpp" +#include "include/minimizer_iterator.hpp" + +namespace sshash { + +template +void dictionary_builder::compute_minimizer_tuples() // +{ + const uint64_t num_threads = build_config.num_threads; + const uint64_t num_sequences = strings_offsets_builder.size() - 1; + const uint64_t num_sequences_per_thread = (num_sequences + num_threads - 1) / num_threads; + const uint64_t k = build_config.k; + const uint64_t m = build_config.m; + + std::vector threads; + threads.reserve(num_threads); + + for (uint64_t t = 0; t * num_sequences_per_thread < num_sequences; ++t) // + { + threads.emplace_back([&, t] { + std::vector buffer; + const uint64_t buffer_size = (build_config.ram_limit_in_GiB * essentials::GiB) / + (2 * sizeof(minimizer_tuple) * num_threads); + buffer.reserve(buffer_size); + + auto save = [&](minimizer_info mini_info, + uint64_t num_kmers_in_super_kmer) // + { + assert(num_kmers_in_super_kmer <= k - m + 1 /* max num kmers in super-kmer */); + if (!buffer.empty() and // + buffer.back().minimizer == mini_info.minimizer and // + buffer.back().pos_in_seq == mini_info.pos_in_seq and // + buffer.back().pos_in_kmer == mini_info.pos_in_kmer) // + { + buffer.back().num_kmers_in_super_kmer += num_kmers_in_super_kmer; + return; + } + if (buffer.size() == buffer_size) { + minimizers.sort_and_flush(buffer); + buffer.clear(); + } + buffer.emplace_back(mini_info, num_kmers_in_super_kmer); + }; + + const uint64_t index_begin = t * num_sequences_per_thread; + const uint64_t index_end = + std::min(index_begin + num_sequences_per_thread, num_sequences); + + kmer_iterator kmer_it(strings_builder, k); + hasher_type hasher(build_config.seed); + minimizer_iterator minimizer_it(k, m, hasher); + minimizer_iterator_rc minimizer_it_rc(k, m, hasher); + + for (uint64_t i = index_begin; i < index_end; ++i) // + { + const uint64_t begin = strings_offsets_builder[i]; + const uint64_t end = strings_offsets_builder[i + 1]; + const uint64_t sequence_len = end - begin; + assert(sequence_len >= k); + + minimizer_info prev_mini_info; + assert(prev_mini_info.minimizer == constants::invalid_uint64); + uint64_t num_kmers_in_super_kmer = 0; + + kmer_it.at(Kmer::bits_per_char * begin); + minimizer_it.set_position(begin); + minimizer_it_rc.set_position(begin); + + for (uint64_t j = 0; j != sequence_len - k + 1; ++j) { + auto uint_kmer = kmer_it.get(); + auto mini_info = minimizer_it.next(uint_kmer); + assert(mini_info.pos_in_seq < end - m + 1); + assert(mini_info.pos_in_kmer < k - m + 1); + + if (build_config.canonical) { + auto uint_kmer_rc = uint_kmer; + uint_kmer_rc.reverse_complement_inplace(k); + auto mini_info_rc = minimizer_it_rc.next(uint_kmer_rc); + assert(mini_info_rc.pos_in_seq < end - m + 1); + assert(mini_info_rc.pos_in_kmer < k - m + 1); + if (mini_info_rc.minimizer < mini_info.minimizer) { + mini_info = mini_info_rc; + mini_info.pos_in_kmer = k - m - mini_info.pos_in_kmer; + } + } + + mini_info.pos_in_seq = + strings_offsets_builder.encode(mini_info.pos_in_seq, begin, i); + + if (prev_mini_info.minimizer == constants::invalid_uint64) { + prev_mini_info = mini_info; + } + + if (mini_info.minimizer != prev_mini_info.minimizer or + mini_info.pos_in_seq != prev_mini_info.pos_in_seq) // + { + save(prev_mini_info, num_kmers_in_super_kmer); + prev_mini_info = mini_info; + num_kmers_in_super_kmer = 0; + } + + num_kmers_in_super_kmer += 1; + kmer_it.next(); + } + + save(prev_mini_info, num_kmers_in_super_kmer); + } + + /* flush leftover */ + if (!buffer.empty()) minimizers.sort_and_flush(buffer); + }); + } + + for (auto& t : threads) { + if (t.joinable()) t.join(); + } +} + +} // namespace sshash diff --git a/include/builder/dictionary_builder.hpp b/include/builder/dictionary_builder.hpp new file mode 100644 index 0000000..65703c0 --- /dev/null +++ b/include/builder/dictionary_builder.hpp @@ -0,0 +1,175 @@ +#pragma once + +#include "essentials.hpp" +#include "include/dictionary.hpp" +#include "include/offsets.hpp" +#include "include/builder/util.hpp" +#include "include/buckets_statistics.hpp" + +namespace sshash { + +template +struct dictionary_builder // +{ + dictionary_builder(build_configuration const& build_config) + : build_config(build_config), num_kmers(0), minimizers(build_config), total_time_musec(0) {} + + void build(dictionary& d, std::string const& filename) // + { + d.m_k = build_config.k; + d.m_m = build_config.m; + d.m_spss.k = build_config.k; + d.m_spss.m = build_config.m; + d.m_canonical = build_config.canonical; + d.m_hasher.seed(build_config.seed); + + build_stats.add("input_filename", filename.c_str()); + build_stats.add("k", d.m_k); + build_stats.add("m", d.m_m); + build_stats.add("canonical", d.m_canonical ? "true" : "false"); + build_stats.add("seed", build_config.seed); + build_stats.add("num_threads", build_config.num_threads); + + total_time_musec = 0; + + do_step("step 1 (encode strings)", [&]() { + encode_strings(filename); + d.m_num_kmers = num_kmers; + assert(strings_offsets_builder.size() >= 2); + d.m_num_strings = strings_offsets_builder.size() - 1; + }); + + if (build_config.weighted) { + do_step("step 1.1 (build weights)", [&]() { weights_builder.build(d.m_weights); }); + } + + do_step("step 2 (compute minimizer tuples)", [&]() { compute_minimizer_tuples(); }); + + do_step("step 3 (merging minimizer tuples)", [&]() { minimizers.merge(); }); + if (build_config.verbose) { + std::cout << "num_minimizers = " << minimizers.num_minimizers() << std::endl; + std::cout << "num_minimizer_positions = " << minimizers.num_minimizer_positions() + << std::endl; + std::cout << "num_super_kmers = " << minimizers.num_super_kmers() << std::endl; + } + + do_step("step 4 (build mphf)", [&]() { build_mphf(d); }); + + do_step("step 5 (replacing minimizer values with MPHF hashes)", + [&]() { hash_minimizers(d); }); + + do_step("step 6 (merging minimizers tuples)", [&]() { minimizers.merge(); }); + + do_step("step 7 (build sparse and skew index)", [&]() { + build_sparse_and_skew_index(d); + minimizers.remove_tmp_file(); + assert(strings_offsets_builder.size() == 0); + }); + + if (build_config.verbose) { + print_time(total_time_musec, "total time"); + d.print_space_breakdown(); + } + + build_stats.add("total_build_time_in_microsec", total_time_musec); + build_stats.add("index_size_in_bytes", (d.num_bits() + 7) / 8); + build_stats.add("num_kmers", d.num_kmers()); + + build_stats.print(); + } + + build_configuration build_config; + uint64_t num_kmers; + minimizers_tuples minimizers; + typename Offsets::builder strings_offsets_builder; + bits::bit_vector::builder strings_builder; + weights::builder weights_builder; + + essentials::timer_type timer; + essentials::json_lines build_stats; + uint64_t total_time_musec; + +private: + void print_time(double time_in_musec, std::string const& message) { + std::cout << "=== " << message << ": " << time_in_musec / 1'000'000 << " [sec] (" + << (time_in_musec * 1000) / num_kmers << " [ns/kmer])" << std::endl; + } + + template + void do_step(std::string const& step, Callback const& f) { + timer.start(); + f(); + timer.stop(); + uint64_t step_elapsed_time_musec = timer.elapsed(); + total_time_musec += step_elapsed_time_musec; + if (build_config.verbose) print_time(step_elapsed_time_musec, step); + build_stats.add(step, step_elapsed_time_musec); + timer.reset(); + } + + void encode_strings(std::string const& filename); + void encode_strings(std::istream& is, const input_file_t fmt); + void compute_minimizer_tuples(); + void build_sparse_and_skew_index(dictionary& d); + + void build_mphf(dictionary& d) { + const uint64_t num_minimizers = minimizers.num_minimizers(); + mm::file_source input(minimizers.get_minimizers_filename(), + mm::advice::sequential); + minimizers_tuples_iterator iterator(input.data(), input.data() + input.size()); + d.m_ssi.codewords.build(iterator, num_minimizers, build_config); + input.close(); + assert(d.m_ssi.codewords.size() == num_minimizers); + } + + void hash_minimizers(dictionary& d) { + std::string filename = minimizers.get_minimizers_filename(); + std::ifstream input(filename, std::ifstream::binary); + + auto const& f = d.m_ssi.codewords.mphf; + const uint64_t num_threads = build_config.num_threads; + const uint64_t num_files_to_merge = minimizers.num_files_to_merge(); + + minimizers.init(); + + const uint64_t num_super_kmers = minimizers.num_super_kmers(); + const uint64_t buffer_size = num_files_to_merge == 1 + ? num_super_kmers + : ((build_config.ram_limit_in_GiB * essentials::GiB) / + (2 * sizeof(minimizer_tuple))); + const uint64_t num_blocks = (num_super_kmers + buffer_size - 1) / buffer_size; + assert(num_super_kmers > (num_blocks - 1) * buffer_size); + + std::vector threads; + threads.reserve(num_threads); + + std::vector buffer; + for (uint64_t i = 0; i != num_blocks; ++i) { + const uint64_t n = (i == num_blocks - 1) + ? num_super_kmers - (num_blocks - 1) * buffer_size + : buffer_size; + buffer.resize(n); + input.read(reinterpret_cast(buffer.data()), + buffer.size() * sizeof(minimizer_tuple)); + const uint64_t chunk_size = (n + num_threads - 1) / num_threads; + for (uint64_t t = 0; t * chunk_size < n; ++t) { + uint64_t begin = t * chunk_size; + uint64_t end = std::min(n, begin + chunk_size); + threads.emplace_back([begin, end, &buffer, &f]() { + for (uint64_t i = begin; i < end; ++i) { + buffer[i].minimizer = f(buffer[i].minimizer); + } + }); + } + for (auto& t : threads) { + if (t.joinable()) t.join(); + } + threads.clear(); + minimizers.sort_and_flush(buffer); + } + + input.close(); + } +}; + +} // namespace sshash \ No newline at end of file diff --git a/include/builder/file_merging_iterator.hpp b/include/builder/file_merging_iterator.hpp index 9336d7a..6db0b36 100644 --- a/include/builder/file_merging_iterator.hpp +++ b/include/builder/file_merging_iterator.hpp @@ -1,40 +1,54 @@ #pragma once -#include - namespace sshash { template struct file_merging_iterator // { + const uint64_t scan_threshold = 16; + template file_merging_iterator(FileNamesIterator file_names_iterator, uint64_t num_files_to_merge) - : m_mm_files(num_files_to_merge) { + : m_mm_files(num_files_to_merge) // + { if (num_files_to_merge == 0) return; - assert(num_files_to_merge > 0); + /* open files and create the input iterators */ m_iterators.reserve(num_files_to_merge); - m_idx_heap.reserve(num_files_to_merge); - - /* create the input iterators and make the heap */ for (uint64_t i = 0; i != num_files_to_merge; ++i, ++file_names_iterator) { m_mm_files[i].open(*file_names_iterator, mm::advice::sequential); m_iterators.push_back( {m_mm_files[i].data(), m_mm_files[i].data() + m_mm_files[i].size()}); - m_idx_heap.push_back(i); } - std::make_heap(m_idx_heap.begin(), m_idx_heap.end(), heap_idx_comparator); + + m_num_files_to_merge = num_files_to_merge; + m_min_idx = 0; + if (m_iterators.size() <= scan_threshold) { + compute_min(); + } else { + /* build a looser tree */ + uint64_t n = num_files_to_merge; + uint64_t m = 2 * n - 1; + m_size = n; + m_tree.resize(m); + m_begin = (1ULL << uint64_t(std::ceil(std::log2(n)))) - 1; + uint64_t i = 0; + for (; m_begin + i != m; ++i) m_tree[m_begin + i] = i; + for (uint64_t j = 0; i != n; ++i, ++j) m_tree[n - 1 + j] = i; + build(0); + m_min_idx = m_tree[0]; + } } - bool has_next() { return !m_idx_heap.empty(); } - void next() { advance_heap_head(); } - T operator*() const { return *(m_iterators[m_idx_heap.front()].begin); } + bool has_next() { return m_num_files_to_merge != 0; } + void next() { update(); } + T operator*() const { return *(m_iterators[m_min_idx].begin); } void close() { - for (uint64_t i = 0; i != m_mm_files.size(); ++i) m_mm_files[i].close(); + for (auto& mm_file : m_mm_files) mm_file.close(); m_iterators.clear(); - m_idx_heap.clear(); m_mm_files.clear(); + m_tree.clear(); } private: @@ -43,34 +57,83 @@ struct file_merging_iterator // T const* end; }; std::vector m_iterators; - std::vector m_idx_heap; std::vector> m_mm_files; + std::vector m_tree; - std::function heap_idx_comparator = [&](uint32_t i, uint32_t j) { - assert(i < m_iterators.size() and j < m_iterators.size()); - assert(m_iterators[i].begin != m_iterators[i].end and - m_iterators[j].begin != m_iterators[j].end); - return *(m_iterators[i].begin) > *(m_iterators[j].begin); - }; + uint64_t m_begin, m_size; + uint64_t m_min_idx, m_num_files_to_merge; - void advance_heap_head() { - uint32_t idx = m_idx_heap.front(); - m_iterators[idx].begin += 1; - if (m_iterators[idx].begin != m_iterators[idx].end) { // if iterator has next - uint64_t pos = 0; - uint64_t size = m_idx_heap.size(); - while (2 * pos + 1 < size) { - uint64_t i = 2 * pos + 1; - if (i + 1 < size and heap_idx_comparator(m_idx_heap[i], m_idx_heap[i + 1])) ++i; - if (heap_idx_comparator(m_idx_heap[i], m_idx_heap[pos])) break; - std::swap(m_idx_heap[pos], m_idx_heap[i]); - pos = i; + void update() { + if (m_iterators.size() <= scan_threshold) { // compute min with a linear scan + auto& it = m_iterators[m_min_idx]; + it.begin += 1; + if (it.begin == it.end) { + m_iterators.erase(m_iterators.begin() + m_min_idx); + m_min_idx = 0; + --m_num_files_to_merge; + if (m_num_files_to_merge == 0) return; } - } else { - std::pop_heap(m_idx_heap.begin(), m_idx_heap.end(), heap_idx_comparator); - m_idx_heap.pop_back(); + compute_min(); + } else { // update the looser tree + m_min_idx = m_tree[0]; + assert(m_min_idx < m_iterators.size()); + auto& it = m_iterators[m_min_idx]; + it.begin += 1; + uint64_t p = m_begin + m_min_idx; + p -= (p >= m_tree.size()) * m_size; // p is the index of the leaf + if (it.begin == it.end) { + m_tree[p] = uint32_t(-1); + --m_num_files_to_merge; + } + while (p) { + uint64_t is_right_child = (p & 1) == 0; + uint32_t i = 0; + uint32_t l = m_tree[p - is_right_child]; + uint32_t r = m_tree[p + 1 - is_right_child]; + if (l == uint32_t(-1)) { + i = r; + } else if (r == uint32_t(-1)) { + i = l; + } else { + i = *(m_iterators[l].begin) < *(m_iterators[r].begin) ? l : r; + } + uint64_t parent = (p - 1) / 2; + m_tree[parent] = i; + p = parent; + } + m_min_idx = m_tree[0]; } }; + + uint32_t build(uint32_t p) { + if (p >= m_tree.size()) return uint32_t(-1); + if (p >= m_size - 1) return m_tree[p]; // leaf + uint32_t l = build(2 * p + 1); + uint32_t r = build(2 * p + 2); + uint32_t i = 0; + if (l == uint32_t(-1)) { + i = r; + } else if (r == uint32_t(-1)) { + i = l; + } else { + i = *(m_iterators[l].begin) < *(m_iterators[r].begin) ? l : r; + } + m_tree[p] = i; + return i; + } + + void compute_min() { + m_min_idx = 0; + auto min_val = *m_iterators.front().begin; + for (uint64_t i = 1; i != m_iterators.size(); ++i) { + assert(m_iterators[i].begin != m_iterators[i].end); + auto val = *m_iterators[i].begin; + if (val < min_val) { + min_val = val; + m_min_idx = i; + } + } + } }; } // namespace sshash \ No newline at end of file diff --git a/include/builder/parallel_sort.hpp b/include/builder/parallel_sort.hpp index 9b6c63c..8605373 100644 --- a/include/builder/parallel_sort.hpp +++ b/include/builder/parallel_sort.hpp @@ -97,7 +97,6 @@ void parallel_sort(std::vector& data, uint64_t num_threads, Compare comp) // next_ranges.clear(); for (uint64_t i = 0; i < ranges.size(); i += 2) { auto [begin1, end1] = ranges[i]; - auto input = data.begin(); auto output = temp_data.begin(); if (swap) std::swap(input, output); @@ -108,7 +107,6 @@ void parallel_sort(std::vector& data, uint64_t num_threads, Compare comp) // if (i + 1 < ranges.size()) { auto [begin2, end2] = ranges[i + 1]; output_size += end2 - begin2; - parallel_merge(begin1, end1, begin2, end2, output_iterator, comp, sequential_merge_threshold); assert(std::is_sorted(output_iterator, output_iterator + output_size, comp)); diff --git a/include/builder/parse_file.cpp b/include/builder/parse_file.cpp new file mode 100644 index 0000000..21e5e07 --- /dev/null +++ b/include/builder/parse_file.cpp @@ -0,0 +1,264 @@ +#include "dictionary_builder.hpp" +#include "util.hpp" +#include "external/gz/zip_stream.hpp" + +#if defined(__AVX2__) +#include +#include +#endif + +namespace sshash { + +namespace util { + +#if defined(__AVX2__) +/* + This function takes 32 bytes and packs the two bits + in positions 1 and 2 (from right) of each byte into + a single 64-bit word. + + This works with the map: + A -> 00; C -> 01; G -> 11; T -> 10. +*/ +inline uint64_t pack2bits_shift1(__m256i v) { + // shift >> 1, then mask by 3 to isolate the relevant bits + __m256i shifted = _mm256_srli_epi16(v, 1); + __m256i values = _mm256_and_si256(shifted, _mm256_set1_epi8(3)); + + // collect bit-0 plane + __m256i bit0 = _mm256_slli_epi16(values, 7); + uint32_t mask0 = _mm256_movemask_epi8(bit0); + + // collect bit-1 plane + __m256i bit1 = _mm256_slli_epi16(values, 6); + uint32_t mask1 = _mm256_movemask_epi8(bit1); + + // interleave into the 64-bit result + uint64_t even = _pdep_u64(mask0, 0x5555555555555555ULL); // 010101... + uint64_t odd = _pdep_u64(mask1, 0xAAAAAAAAAAAAAAAAULL); // 101010... + return even | odd; +} +#endif + +} // namespace util + +template +void dictionary_builder::encode_strings(std::istream& is, + const input_file_t fmt) // +{ + const uint64_t k = build_config.k; + const uint64_t m = build_config.m; + assert(k > 0 and k >= m); + const uint64_t max_num_kmers_in_super_kmer = k - m + 1; + + if (max_num_kmers_in_super_kmer >= (1ULL << (sizeof(num_kmers_in_super_kmer_uint_type) * 8))) { + throw std::runtime_error( + "max_num_kmers_in_super_kmer " + std::to_string(max_num_kmers_in_super_kmer) + + " does not fit into " + std::to_string(sizeof(num_kmers_in_super_kmer_uint_type) * 8) + + " bits"); + } + + { + const uint64_t num_bits_for_strings = 8 * 8 * essentials::GB; // reserve 8 GB of memory + const uint64_t num_sequences = 100'000'000; + strings_builder.reserve(num_bits_for_strings); + strings_offsets_builder.reserve(num_sequences); + } + + std::string sequence; + uint64_t num_bases = 0; + uint64_t max_len = 0; + uint64_t seq_len = 0; + weights_builder.init(); + + /* intervals of weights */ + uint64_t weight_value = constants::invalid_uint64; + uint64_t weight_length = 0; + + while (true) // + { + if (fmt == input_file_t::cf_seg) { + std::getline(is, sequence, '\t'); // skip until '\t' and consume it + } else { + assert(fmt == input_file_t::fasta); + if (build_config.weighted) { // parse header + std::getline(is, sequence); // header sequence + if (sequence.empty()) break; + + /* + Heder format: + >[id] LN:i:[seq_len] ab:Z:[weight_seq] + where [weight_seq] is a space-separated sequence of integer counters + (the weights), whose length is equal to [seq_len]-k+1. + Example: '>12 LN:i:41 ab:Z:2 2 2 2 2 2 2 2 2 2 2' + */ + + expect(sequence[0], '>'); + uint64_t i = 0; + i = sequence.find_first_of(' ', i); + if (i == std::string::npos) throw parse_runtime_error(); + + i += 1; + expect(sequence[i + 0], 'L'); + expect(sequence[i + 1], 'N'); + expect(sequence[i + 2], ':'); + expect(sequence[i + 3], 'i'); + expect(sequence[i + 4], ':'); + i += 5; + uint64_t j = sequence.find_first_of(' ', i); + if (j == std::string::npos) throw parse_runtime_error(); + + seq_len = std::strtoull(sequence.data() + i, nullptr, 10); + i = j + 1; + expect(sequence[i + 0], 'a'); + expect(sequence[i + 1], 'b'); + expect(sequence[i + 2], ':'); + expect(sequence[i + 3], 'Z'); + expect(sequence[i + 4], ':'); + i += 5; + + for (uint64_t j = 0; j != seq_len - k + 1; ++j) { + uint64_t weight = std::strtoull(sequence.data() + i, nullptr, 10); + i = sequence.find_first_of(' ', i) + 1; + weights_builder.eat(weight); + if (weight == weight_value) { + weight_length += 1; + } else { + if (weight_value != constants::invalid_uint64) { + weights_builder.push_weight_interval(weight_value, weight_length); + } + weight_value = weight; + weight_length = 1; + } + } + } else { + // skip header sequence + is.ignore(std::numeric_limits::max(), '\n'); + } + } + + std::getline(is, sequence); // DNA sequence + + if (is.eof()) break; + + const uint64_t n = sequence.length(); + assert(n >= k); + max_len = n > max_len ? n : max_len; + assert(strings_builder.num_bits() % Kmer::bits_per_char == 0); + strings_offsets_builder.push_back(strings_builder.num_bits() / Kmer::bits_per_char); + num_kmers += n - k + 1; + num_bases += n; + + if (build_config.weighted and seq_len != n) { + std::cout << "ERROR: expected a sequence of length " << seq_len + << " but got one of length " << n << std::endl; + throw std::runtime_error("file is malformed"); + } + + if (build_config.verbose and strings_offsets_builder.size() % 1'000'000 == 0) { + std::cout << "read " << strings_offsets_builder.size() << " sequences, " << num_bases + << " bases, " << num_kmers << " kmers" << std::endl; + } + + uint64_t i = 0; + if constexpr (Kmer::bits_per_char == 2) { +#if !defined(SSHASH_USE_TRADITIONAL_NUCLEOTIDE_ENCODING) and defined(__AVX2__) + + /* process 32 bytes at a time */ + for (; i + 32 <= n; i += 32) { + __m256i v = _mm256_loadu_si256(reinterpret_cast<__m256i const*>(&sequence[i])); + uint64_t word = util::pack2bits_shift1(v); + strings_builder.append_bits(word, 64); + } +#endif + } + for (; i < n; ++i) { + strings_builder.append_bits(Kmer::char_to_uint(sequence[i]), Kmer::bits_per_char); + } + } + + strings_offsets_builder.push_back(strings_builder.num_bits() / Kmer::bits_per_char); + assert(strings_offsets_builder.front() == 0); + assert(strings_offsets_builder.size() >= 2); + + /* Push a final sentinel (dummy) value to avoid bounds' checking in + kmer_iterator::fill_buff(). */ + static_assert(Kmer::uint_kmer_bits % 64 == 0); + for (int dummy_bits = Kmer::uint_kmer_bits; dummy_bits; dummy_bits -= 64) { + strings_builder.append_bits(0, 64); + } + + const uint64_t num_sequences = strings_offsets_builder.size() - 1; + + if (build_config.verbose) { + std::cout << "read " << num_sequences << " sequences, " << num_bases << " bases, " + << num_kmers << " kmers" << std::endl; + std::cout << "num_kmers " << num_kmers << std::endl; + std::cout << "cost: 2.0 + " + << static_cast(Kmer::bits_per_char * num_sequences * (k - 1)) / num_kmers + << " [bits/kmer]" << std::endl; + } + + /* + The parameter m (minimizer length) should be at least + ceil(log_s(N))+1 + where N is the number of nucleotides in the input and s is the alphabet size. + We warn the user if the used m is less than this lower bound. + */ + const uint64_t s = uint64_t(1) << Kmer::bits_per_char; + const uint64_t lower_bound_on_m = std::ceil(std::log(num_bases) / std::log(s)) + 1; + if (build_config.verbose and m < lower_bound_on_m) { + std::cout << "\n--> WARNING: using minimizer length " << m + << " but it should be at least ceil(log_" << s << "(" << num_bases + << "))+1 = " << lower_bound_on_m << '\n' + << std::endl; + } + + if (build_config.weighted) { + weights_builder.push_weight_interval(weight_value, weight_length); + weights_builder.finalize(num_kmers); + } + + num_bits nb; + nb.per_absolute_offset = std::ceil(std::log2(strings_offsets_builder.back())); + nb.per_relative_offset = std::ceil(std::log2(max_len - m + 1)); + nb.per_string_id = std::ceil(std::log2(num_sequences)); + + if (build_config.verbose) { + std::cout << "max string length = " << max_len << std::endl; + std::cout << "num bits per_absolute_offset = " << nb.per_absolute_offset << std::endl; + std::cout << "num bits per_relative_offset = " << nb.per_relative_offset << std::endl; + std::cout << "num bits per_string_id = " << nb.per_string_id << std::endl; + } + + if (nb.per_string_id + nb.per_relative_offset > 64) { + throw std::runtime_error("minimizer offset does not fit within 64 bits"); + } + + strings_offsets_builder.set_num_bits(nb); +} + +template +void dictionary_builder::encode_strings(std::string const& filename) // +{ + std::ifstream is(filename.c_str()); + if (!is.good()) throw std::runtime_error("error in opening the file '" + filename + "'"); + if (build_config.verbose) std::cout << "reading file '" << filename << "'..." << std::endl; + if (util::ends_with(filename, ".gz")) { + zip_istream zis(is); + if (util::ends_with(filename, ".cf_seg.gz")) { + encode_strings(zis, input_file_t::cf_seg); + } else { + encode_strings(zis, input_file_t::fasta); + } + } else { + if (util::ends_with(filename, ".cf_seg")) { + encode_strings(is, input_file_t::cf_seg); + } else { + encode_strings(is, input_file_t::fasta); + } + } + is.close(); +} + +} // namespace sshash \ No newline at end of file diff --git a/include/builder/parse_file.hpp b/include/builder/parse_file.hpp deleted file mode 100644 index 94114a5..0000000 --- a/include/builder/parse_file.hpp +++ /dev/null @@ -1,350 +0,0 @@ -#pragma once - -#include "external/gz/zip_stream.hpp" -#include "include/minimizer_iterator.hpp" - -namespace sshash { - -template -struct parse_data { - parse_data(build_configuration const& build_config) : num_kmers(0), minimizers(build_config) {} - - uint64_t num_kmers; - minimizers_tuples minimizers; - // compact_string_pool strings; - std::vector pieces; - bits::bit_vector strings; - weights::builder weights_builder; -}; - -template -void parse_file(std::istream& is, parse_data& data, - build_configuration const& build_config) // -{ - essentials::timer_type timer; - timer.start(); - - const uint64_t k = build_config.k; - const uint64_t m = build_config.m; - assert(k > 0 and k >= m); - const uint64_t max_num_kmers_in_super_kmer = k - m + 1; - - if (max_num_kmers_in_super_kmer >= (1ULL << (sizeof(num_kmers_in_super_kmer_uint_type) * 8))) { - throw std::runtime_error( - "max_num_kmers_in_super_kmer " + std::to_string(max_num_kmers_in_super_kmer) + - " does not fit into " + std::to_string(sizeof(num_kmers_in_super_kmer_uint_type) * 8) + - " bits"); - } - - /* fit into the wanted number of bits */ - assert(max_num_kmers_in_super_kmer < (1ULL << (sizeof(num_kmers_in_super_kmer_uint_type) * 8))); - - const uint64_t num_bits = 8 * 8 * essentials::GB; // reserve 8 GB of memory - bits::bit_vector::builder bvb_strings; - bvb_strings.reserve(num_bits); - - std::string sequence; - uint64_t num_sequences = 0; - uint64_t num_bases = 0; - - hasher_type hasher(build_config.seed); - minimizer_iterator minimizer_it(k, m, hasher); - minimizer_iterator_rc minimizer_it_rc(k, m, hasher); - uint64_t seq_len = 0; - uint64_t sum_of_weights = 0; - data.weights_builder.init(); - - /* intervals of weights */ - uint64_t weight_value = constants::invalid_uint64; - uint64_t weight_length = 0; - - while (true) // - { - if constexpr (fmt == input_file_type::cf_seg) { - std::getline(is, sequence, '\t'); // skip until and consume '\t' - } else { - static_assert(fmt == input_file_type::fasta); - if (build_config.weighted) { // parse header - std::getline(is, sequence); // header sequence - if (sequence.empty()) break; - - /* - Heder format: - >[id] LN:i:[seq_len] ab:Z:[weight_seq] - where [weight_seq] is a space-separated sequence of integer counters - (the weights), whose length is equal to [seq_len]-k+1. - Example: '>12 LN:i:41 ab:Z:2 2 2 2 2 2 2 2 2 2 2' - */ - - expect(sequence[0], '>'); - uint64_t i = 0; - i = sequence.find_first_of(' ', i); - if (i == std::string::npos) throw parse_runtime_error(); - - i += 1; - expect(sequence[i + 0], 'L'); - expect(sequence[i + 1], 'N'); - expect(sequence[i + 2], ':'); - expect(sequence[i + 3], 'i'); - expect(sequence[i + 4], ':'); - i += 5; - uint64_t j = sequence.find_first_of(' ', i); - if (j == std::string::npos) throw parse_runtime_error(); - - seq_len = std::strtoull(sequence.data() + i, nullptr, 10); - i = j + 1; - expect(sequence[i + 0], 'a'); - expect(sequence[i + 1], 'b'); - expect(sequence[i + 2], ':'); - expect(sequence[i + 3], 'Z'); - expect(sequence[i + 4], ':'); - i += 5; - - for (uint64_t j = 0; j != seq_len - k + 1; ++j) { - uint64_t weight = std::strtoull(sequence.data() + i, nullptr, 10); - i = sequence.find_first_of(' ', i) + 1; - data.weights_builder.eat(weight); - sum_of_weights += weight; - if (weight == weight_value) { - weight_length += 1; - } else { - if (weight_value != constants::invalid_uint64) { - data.weights_builder.push_weight_interval(weight_value, weight_length); - } - weight_value = weight; - weight_length = 1; - } - } - } else { - // skip header sequence - is.ignore(std::numeric_limits::max(), '\n'); - } - } - - std::getline(is, sequence); // DNA sequence - - if (is.eof()) break; - - const uint64_t n = sequence.length(); - assert(n >= k); - - ++num_sequences; - if (num_sequences % 100000 == 0) { - std::cout << "read " << num_sequences << " sequences, " << num_bases << " bases, " - << data.num_kmers << " kmers" << std::endl; - } - - assert(bvb_strings.num_bits() % kmer_t::bits_per_char == 0); - data.pieces.push_back(bvb_strings.num_bits() / kmer_t::bits_per_char); - - num_bases += n; - - if (build_config.weighted and seq_len != n) { - std::cout << "ERROR: expected a sequence of length " << seq_len - << " but got one of length " << n << std::endl; - throw std::runtime_error("file is malformed"); - } - - data.num_kmers += n - k + 1; - - uint64_t i = 0; - if constexpr (kmer_t::bits_per_char == 2) { -#if !defined(SSHASH_USE_TRADITIONAL_NUCLEOTIDE_ENCODING) and defined(__AVX2__) - - /* process 32 bytes at a time */ - for (; i + 32 <= n; i += 32) { - __m256i v = _mm256_loadu_si256(reinterpret_cast<__m256i const*>(&sequence[i])); - uint64_t word = pack2bits_shift1(v); - bvb_strings.append_bits(word, 64); - } -#endif - } - for (; i < n; ++i) { - bvb_strings.append_bits(kmer_t::char_to_uint(sequence[i]), kmer_t::bits_per_char); - } - } - - /* - So pieces will be of size p+1, where p is the number of DNA sequences - in the input file. - */ - data.pieces.push_back(bvb_strings.num_bits() / kmer_t::bits_per_char); - assert(data.pieces.front() == 0); - - /* Push a final sentinel (dummy) value to avoid bounds' checking in - kmer_iterator::fill_buff(). */ - static_assert(kmer_t::uint_kmer_bits % 64 == 0); - for (int dummy_bits = kmer_t::uint_kmer_bits; dummy_bits; dummy_bits -= 64) { - bvb_strings.append_bits(0, 64); - } - - bvb_strings.build(data.strings); - - assert(data.pieces.front() == 0); - assert(data.pieces.size() == num_sequences + 1); - - timer.stop(); - print_time(timer.elapsed(), data.num_kmers, "step 1.1: 'encoding_input'"); - - std::cout << "read " << num_sequences << " sequences, " << num_bases << " bases, " - << data.num_kmers << " kmers" << std::endl; - std::cout << "num_kmers " << data.num_kmers << std::endl; - std::cout << "cost: 2.0 + " - << static_cast(kmer_t::bits_per_char * num_sequences * (k - 1)) / - data.num_kmers - << " [bits/kmer]" << std::endl; - - /* - The parameter m (minimizer length) should be at least - ceil(log_s(N))+1 - where N is the number of nucleotides in the input and s is the alphabet size. - We warn the user if the used m is less than this lower bound. - */ - const uint64_t s = uint64_t(1) << kmer_t::bits_per_char; - const uint64_t lower_bound_on_m = std::ceil(std::log(num_bases) / std::log(s)) + 1; - if (m < lower_bound_on_m) { - std::cout << "\n--> WARNING: using minimizer length " << m - << " but it should be at least ceil(log_" << s << "(" << num_bases - << "))+1 = " << lower_bound_on_m << '\n' - << std::endl; - } - - timer.reset(); - timer.start(); - - const uint64_t num_threads = build_config.num_threads; - const uint64_t num_sequences_per_thread = (num_sequences + num_threads - 1) / num_threads; - std::vector threads; - threads.reserve(num_threads); - - for (uint64_t t = 0; t * num_sequences_per_thread < num_sequences; ++t) // - { - threads.emplace_back([&, t] { - std::vector buffer; - const uint64_t buffer_size = (build_config.ram_limit_in_GiB * essentials::GiB) / - (2 * sizeof(minimizer_tuple) * num_threads); - buffer.reserve(buffer_size); - - auto save = [&](minimizer_info mini_info, - uint64_t num_kmers_in_super_kmer) // - { - assert(num_kmers_in_super_kmer <= max_num_kmers_in_super_kmer); - if (!buffer.empty() and // - buffer.back().minimizer == mini_info.minimizer and // - buffer.back().pos_in_seq == mini_info.pos_in_seq and // - buffer.back().pos_in_kmer == mini_info.pos_in_kmer) // - { - buffer.back().num_kmers_in_super_kmer += num_kmers_in_super_kmer; - return; - } - if (buffer.size() == buffer_size) { - data.minimizers.sort_and_flush(buffer); - buffer.clear(); - } - buffer.emplace_back(mini_info, num_kmers_in_super_kmer); - }; - - const uint64_t index_begin = t * num_sequences_per_thread; - const uint64_t index_end = - std::min(index_begin + num_sequences_per_thread, num_sequences); - - kmer_iterator it(data.strings, k); - minimizer_iterator minimizer_it(k, m, hasher); - minimizer_iterator_rc minimizer_it_rc(k, m, hasher); - - for (uint64_t i = index_begin; i < index_end; ++i) // - { - const uint64_t begin = data.pieces[i]; - const uint64_t end = data.pieces[i + 1]; - const uint64_t sequence_len = end - begin; - assert(sequence_len >= k); - - minimizer_info prev_mini_info; - assert(prev_mini_info.minimizer == constants::invalid_uint64); - uint64_t num_kmers_in_super_kmer = 0; - - it.at(kmer_t::bits_per_char * begin); - minimizer_it.set_position(begin); - minimizer_it_rc.set_position(begin); - - for (uint64_t j = 0; j != sequence_len - k + 1; ++j) { - auto uint_kmer = it.get(); - auto mini_info = minimizer_it.next(uint_kmer); - assert(mini_info.pos_in_seq < end - m + 1); - assert(mini_info.pos_in_kmer < k - m + 1); - - if (build_config.canonical) { - auto uint_kmer_rc = uint_kmer; - uint_kmer_rc.reverse_complement_inplace(k); - auto mini_info_rc = minimizer_it_rc.next(uint_kmer_rc); - assert(mini_info_rc.pos_in_seq < end - m + 1); - assert(mini_info_rc.pos_in_kmer < k - m + 1); - if (mini_info_rc.minimizer < mini_info.minimizer) { - mini_info = mini_info_rc; - mini_info.pos_in_kmer = k - m - mini_info.pos_in_kmer; - } - } - - if (prev_mini_info.minimizer == constants::invalid_uint64) { - prev_mini_info = mini_info; - } - - if (mini_info.minimizer != prev_mini_info.minimizer or - mini_info.pos_in_seq != prev_mini_info.pos_in_seq) // - { - save(prev_mini_info, num_kmers_in_super_kmer); - prev_mini_info = mini_info; - num_kmers_in_super_kmer = 0; - } - - num_kmers_in_super_kmer += 1; - it.next(); - } - - save(prev_mini_info, num_kmers_in_super_kmer); - } - - /* flush leftover */ - if (!buffer.empty()) data.minimizers.sort_and_flush(buffer); - }); - } - - for (auto& t : threads) { - if (t.joinable()) t.join(); - } - - timer.stop(); - print_time(timer.elapsed(), data.num_kmers, "step 1.2: 'computing_minimizers_tuples'"); - - if (build_config.weighted) { - std::cout << "sum_of_weights " << sum_of_weights << std::endl; - data.weights_builder.push_weight_interval(weight_value, weight_length); - data.weights_builder.finalize(data.num_kmers); - } -} - -template -void parse_file(std::string const& filename, parse_data& data, - build_configuration const& build_config) // -{ - std::ifstream is(filename.c_str()); - if (!is.good()) throw std::runtime_error("error in opening the file '" + filename + "'"); - std::cout << "reading file '" << filename << "'..." << std::endl; - if (util::ends_with(filename, ".gz")) { - zip_istream zis(is); - if (util::ends_with(filename, ".cf_seg.gz")) { - parse_file(zis, data, build_config); - } else { - parse_file(zis, data, build_config); - } - } else { - if (util::ends_with(filename, ".cf_seg")) { - parse_file(is, data, build_config); - } else { - parse_file(is, data, build_config); - } - } - is.close(); -} - -} // namespace sshash \ No newline at end of file diff --git a/include/builder/util.hpp b/include/builder/util.hpp index 0b9b2cc..d58ee58 100644 --- a/include/builder/util.hpp +++ b/include/builder/util.hpp @@ -3,22 +3,11 @@ #include #include -#if defined(__AVX2__) -#include -#include -#endif - #include "file_merging_iterator.hpp" #include "parallel_sort.hpp" namespace sshash { -[[maybe_unused]] static void print_time(double time, uint64_t num_kmers, - std::string const& message) { - std::cout << "=== " << message << " " << time / 1000000 << " [sec] (" - << (time * 1000) / num_kmers << " [ns/kmer])" << std::endl; -} - struct parse_runtime_error : public std::runtime_error { parse_runtime_error() : std::runtime_error("did you provide an input file with weights?") {} }; @@ -30,35 +19,6 @@ struct parse_runtime_error : public std::runtime_error { } } -#if defined(__AVX2__) -/* - This function takes 32 bytes and packs the two bits - in positions 1 and 2 (from right) of each byte into - a single 64-bit word. - - This works with the map: - A -> 00; C -> 01; G -> 11; T -> 10. -*/ -inline uint64_t pack2bits_shift1(__m256i v) { - // shift >> 1, then mask by 3 to isolate the relevant bits - __m256i shifted = _mm256_srli_epi16(v, 1); - __m256i values = _mm256_and_si256(shifted, _mm256_set1_epi8(3)); - - // collect bit-0 plane - __m256i bit0 = _mm256_slli_epi16(values, 7); - uint32_t mask0 = _mm256_movemask_epi8(bit0); - - // collect bit-1 plane - __m256i bit1 = _mm256_slli_epi16(values, 6); - uint32_t mask1 = _mm256_movemask_epi8(bit1); - - // interleave into the 64-bit result - uint64_t even = _pdep_u64(mask0, 0x5555555555555555ULL); // 010101... - uint64_t odd = _pdep_u64(mask1, 0xAAAAAAAAAAAAAAAAULL); // 101010... - return even | odd; -} -#endif - typedef uint8_t num_kmers_in_super_kmer_uint_type; #pragma pack(push, 2) @@ -74,6 +34,10 @@ struct minimizer_tuple { if (minimizer != other.minimizer) return minimizer > other.minimizer; return pos_in_seq > other.pos_in_seq; } + bool operator<(minimizer_tuple other) const { + if (minimizer != other.minimizer) return minimizer < other.minimizer; + return pos_in_seq < other.pos_in_seq; + } uint64_t minimizer; uint64_t pos_in_seq; @@ -196,8 +160,7 @@ struct minimizers_tuples { , m_num_minimizer_positions(0) , m_num_super_kmers(0) , m_run_identifier(pthash::clock_type::now().time_since_epoch().count()) - , m_num_threads(build_config.num_threads) - , m_tmp_dirname(build_config.tmp_dirname) // + , m_build_config(build_config) // { init(); } @@ -205,14 +168,16 @@ struct minimizers_tuples { void init() { m_num_files_to_merge = 0; } void sort_and_flush(std::vector& buffer) { - parallel_sort(buffer, m_num_threads, + parallel_sort(buffer, m_build_config.num_threads, [](minimizer_tuple const& x, minimizer_tuple const& y) { return (x.minimizer < y.minimizer) or (x.minimizer == y.minimizer and x.pos_in_seq < y.pos_in_seq); }); uint64_t id = m_num_files_to_merge.fetch_add(1); auto tmp_output_filename = get_tmp_output_filename(id); - std::cout << "saving to file '" << tmp_output_filename << "'..." << std::endl; + if (m_build_config.verbose) { + std::cout << "saving to file '" << tmp_output_filename << "'..." << std::endl; + } std::ofstream out(tmp_output_filename.c_str(), std::ofstream::binary); if (!out.is_open()) throw std::runtime_error("cannot open file"); out.write(reinterpret_cast(buffer.data()), @@ -224,7 +189,8 @@ struct minimizers_tuples { std::string get_minimizers_filename() const { assert(m_num_files_to_merge > 0); std::stringstream filename; - filename << m_tmp_dirname << "/sshash.tmp.run_" << m_run_identifier << ".minimizers.bin"; + filename << m_build_config.tmp_dirname << "/sshash.tmp.run_" << m_run_identifier + << ".minimizers.bin"; return filename.str(); } @@ -257,25 +223,25 @@ struct minimizers_tuples { it.has_next(); it.next()) // { auto bucket = it.bucket(); + m_num_minimizers += 1; m_num_minimizer_positions += bucket.size(); m_num_super_kmers += bucket.num_super_kmers(); - ++m_num_minimizers; } input.close(); return; } - std::cout << " == files to merge = " << m_num_files_to_merge << std::endl; - assert(m_num_files_to_merge > 1); - file_merging_iterator fm_iterator(files_name_iterator_begin(), m_num_files_to_merge); - std::cout << "saving tuples to '" << get_minimizers_filename() << "'" << std::endl; std::ofstream out(get_minimizers_filename().c_str()); if (!out.is_open()) throw std::runtime_error("cannot open file"); + if (m_build_config.verbose) { + std::cout << "saving to file '" << get_minimizers_filename() << "'" << std::endl; + } + m_num_minimizers = 0; m_num_minimizer_positions = 0; m_num_super_kmers = 0; @@ -293,8 +259,8 @@ struct minimizers_tuples { out.write(reinterpret_cast(&mt), sizeof(minimizer_tuple)); prev_pos_in_seq = mt.pos_in_seq; ++m_num_super_kmers; - if (m_num_super_kmers % 50000000 == 0) { - std::cout << "num_super_kmers = " << m_num_super_kmers << std::endl; + if (m_build_config.verbose and m_num_super_kmers % 100'000'000 == 0) { + std::cout << "processed " << m_num_super_kmers << " minimizer tuples" << std::endl; } fm_iterator.next(); } @@ -302,10 +268,6 @@ struct minimizers_tuples { out.close(); fm_iterator.close(); - std::cout << "num_minimizers = " << m_num_minimizers << std::endl; - std::cout << "num_minimizer_positions = " << m_num_minimizer_positions << std::endl; - std::cout << "num_super_kmers = " << m_num_super_kmers << std::endl; - /* remove tmp files */ for (uint64_t i = 0; i != m_num_files_to_merge; ++i) { auto tmp_output_filename = get_tmp_output_filename(i); @@ -325,15 +287,13 @@ struct minimizers_tuples { uint64_t m_num_minimizers; uint64_t m_num_minimizer_positions; uint64_t m_num_super_kmers; - uint64_t m_run_identifier; - uint64_t m_num_threads; - std::string m_tmp_dirname; + build_configuration m_build_config; std::string get_tmp_output_filename(uint64_t id) const { std::stringstream filename; - filename << m_tmp_dirname << "/sshash.tmp.run_" << m_run_identifier << ".minimizers." << id - << ".bin"; + filename << m_build_config.tmp_dirname << "/sshash.tmp.run_" << m_run_identifier + << ".minimizers." << id << ".bin"; return filename.str(); } }; diff --git a/include/constants.hpp b/include/constants.hpp index abc10fe..3925292 100644 --- a/include/constants.hpp +++ b/include/constants.hpp @@ -1,7 +1,5 @@ #pragma once -#include "kmer.hpp" - namespace sshash::constants { constexpr uint64_t invalid_uint64 = uint64_t(-1); @@ -9,19 +7,20 @@ constexpr uint64_t default_ram_limit_in_GiB = 8; constexpr uint64_t seed = 1; /* for PTHash */ -constexpr double lambda = 7.0; +constexpr double lambda = 5.0; constexpr uint64_t avg_partition_size = 3000000; constexpr uint64_t min_l = 6; -constexpr uint64_t max_l = 12; +constexpr uint64_t max_l = 13; static_assert(min_l < max_l); +static_assert(max_l - min_l + 1 <= 8); static const std::string default_tmp_dirname("."); constexpr int forward_orientation = 1; constexpr int backward_orientation = -1; namespace current_version_number { -constexpr uint8_t x = 4; +constexpr uint8_t x = 5; constexpr uint8_t y = 0; constexpr uint8_t z = 0; } // namespace current_version_number diff --git a/include/dictionary.hpp b/include/dictionary.hpp index 178a2d1..d0fa3db 100644 --- a/include/dictionary.hpp +++ b/include/dictionary.hpp @@ -1,20 +1,26 @@ #pragma once #include "util.hpp" -#include "minimizers.hpp" -#include "buckets.hpp" -#include "skew_index.hpp" +#include "spectrum_preserving_string_set.hpp" +#include "sparse_and_skew_index.hpp" #include "weights.hpp" namespace sshash { -template -struct dictionary { +template +struct dictionary // +{ + using kmer_type = Kmer; + + template + friend struct dictionary_builder; + dictionary() : m_vnum(constants::current_version_number::x, // constants::current_version_number::y, // constants::current_version_number::z) - , m_size(0) + , m_num_kmers(0) + , m_num_strings(0) , m_k(0) , m_m(0) , m_canonical(false) {} @@ -23,45 +29,38 @@ struct dictionary { void build(std::string const& input_filename, build_configuration const& build_config); essentials::version_number vnum() const { return m_vnum; } - uint64_t size() const { return m_size; } + uint64_t num_kmers() const { return m_num_kmers; } + uint64_t num_strings() const { return m_num_strings; } uint64_t k() const { return m_k; } uint64_t m() const { return m_m; } - uint64_t num_contigs() const { return m_buckets.pieces.size() - 1; } bool canonical() const { return m_canonical; } bool weighted() const { return !m_weights.empty(); } hasher_type const& hasher() const { return m_hasher; } - /* Lookup queries. Return the kmer_id of the kmer or -1 if it is not found in the dictionary. */ - uint64_t lookup(char const* string_kmer, bool check_reverse_complement = true) const; - uint64_t lookup_uint(kmer_t uint_kmer, bool check_reverse_complement = true) const; + /* Lookup queries. */ + lookup_result lookup(char const* string_kmer, bool check_reverse_complement = true) const; + lookup_result lookup(Kmer uint_kmer, bool check_reverse_complement = true) const; - /* Advanced lookup queries. Return also contig information. */ - lookup_result lookup_advanced(char const* string_kmer, - bool check_reverse_complement = true) const; - lookup_result lookup_advanced_uint(kmer_t uint_kmer, - bool check_reverse_complement = true) const; - - /* Return the number of kmers in contig. Since contigs do not have duplicates, - the length of the contig is always size + k - 1. */ - uint64_t contig_size(uint64_t contig_id) const; + /* Return the number of kmers in string. Since strings do not have duplicates, + the length of the string is always size + k - 1. */ + uint64_t string_size(uint64_t string_id) const; /* Navigational queries. */ - neighbourhood kmer_forward_neighbours(char const* string_kmer, - bool check_reverse_complement = true) const; - neighbourhood kmer_forward_neighbours(kmer_t uint_kmer, - bool check_reverse_complement = true) const; - neighbourhood kmer_backward_neighbours(char const* string_kmer, - bool check_reverse_complement = true) const; - neighbourhood kmer_backward_neighbours(kmer_t uint_kmer, - bool check_reverse_complement = true) const; + neighbourhood kmer_forward_neighbours(char const* string_kmer, + bool check_reverse_complement = true) const; + neighbourhood kmer_forward_neighbours(Kmer uint_kmer, + bool check_reverse_complement = true) const; + neighbourhood kmer_backward_neighbours(char const* string_kmer, + bool check_reverse_complement = true) const; + neighbourhood kmer_backward_neighbours(Kmer uint_kmer, + bool check_reverse_complement = true) const; /* forward and backward */ - neighbourhood kmer_neighbours(char const* string_kmer, - bool check_reverse_complement = true) const; - neighbourhood kmer_neighbours(kmer_t uint_kmer, + neighbourhood kmer_neighbours(char const* string_kmer, + bool check_reverse_complement = true) const; + neighbourhood kmer_neighbours(Kmer uint_kmer, bool check_reverse_complement = true) const; + neighbourhood string_neighbours(uint64_t string_id, bool check_reverse_complement = true) const; - neighbourhood contig_neighbours(uint64_t contig_id, - bool check_reverse_complement = true) const; /* Return the weight of the kmer given its id. */ uint64_t weight(uint64_t kmer_id) const; @@ -71,10 +70,9 @@ struct dictionary { /* Membership queries. */ bool is_member(char const* string_kmer, bool check_reverse_complement = true) const; - bool is_member_uint(kmer_t uint_kmer, bool check_reverse_complement = true) const; + bool is_member(Kmer uint_kmer, bool check_reverse_complement = true) const; - /* Streaming query. */ - template + template friend struct streaming_query; streaming_query_report // @@ -82,47 +80,44 @@ struct dictionary { struct iterator { iterator(dictionary const* ptr, const uint64_t begin_kmer_id, const uint64_t end_kmer_id) { - m_it = ptr->m_buckets.at(begin_kmer_id, end_kmer_id, ptr->m_k); + m_it = ptr->m_spss.at(begin_kmer_id, end_kmer_id); } bool has_next() const { return m_it.has_next(); } - /* (kmer-id, kmer) */ - std::pair next() { return m_it.next(); } + /* (kmer-id, encoded kmer) */ + std::pair next() { return m_it.next(); } private: - typename buckets::iterator m_it; + typename spectrum_preserving_string_set::iterator m_it; }; - iterator begin() const { return iterator(this, 0, size()); } + iterator begin() const { return iterator(this, 0, num_kmers()); } iterator at_kmer_id(const uint64_t kmer_id) const { - assert(kmer_id < size()); - return iterator(this, kmer_id, size()); + assert(kmer_id < num_kmers()); + return iterator(this, kmer_id, num_kmers()); } std::pair // [begin, end) - contig_offsets(const uint64_t contig_id) const { - return m_buckets.contig_offsets(contig_id); + string_offsets(const uint64_t string_id) const { + return m_spss.string_offsets(string_id); } - iterator at_contig_id(const uint64_t contig_id) const { - assert(contig_id < num_contigs()); - auto [begin, end] = contig_offsets(contig_id); - uint64_t contig_length = end - begin; // in bases - assert(contig_length >= m_k); - uint64_t contig_size = contig_length - m_k + 1; // in kmers - uint64_t begin_kmer_id = begin - contig_id * (m_k - 1); - uint64_t end_kmer_id = begin_kmer_id + contig_size; + iterator at_string_id(const uint64_t string_id) const { + assert(string_id < num_strings()); + auto [begin, end] = string_offsets(string_id); + uint64_t string_length = end - begin; // in bases + assert(string_length >= m_k); + uint64_t string_size = string_length - m_k + 1; // in kmers + uint64_t begin_kmer_id = begin - string_id * (m_k - 1); + uint64_t end_kmer_id = begin_kmer_id + string_size; return iterator(this, begin_kmer_id, end_kmer_id); } - bits::bit_vector const& strings() const { return m_buckets.strings; } - uint64_t num_bits() const; void print_info() const; void print_space_breakdown() const; - void compute_statistics() const; template void visit(Visitor& visitor) const { @@ -134,50 +129,49 @@ struct dictionary { visit_impl(visitor, *this); } - const buckets& data() const { return m_buckets; } - const minimizers& get_minimizers() const { return m_minimizers; } - private: template static void visit_impl(Visitor& visitor, T&& t) { visitor.visit(t.m_vnum); util::check_version_number(t.m_vnum); - visitor.visit(t.m_size); + visitor.visit(t.m_num_kmers); + visitor.visit(t.m_num_strings); visitor.visit(t.m_k); visitor.visit(t.m_m); visitor.visit(t.m_canonical); visitor.visit(t.m_hasher); - visitor.visit(t.m_minimizers); - visitor.visit(t.m_buckets); - visitor.visit(t.m_skew_index); + visitor.visit(t.m_spss); + visitor.visit(t.m_ssi); visitor.visit(t.m_weights); } essentials::version_number m_vnum; - uint64_t m_size; + uint64_t m_num_kmers; + uint64_t m_num_strings; uint16_t m_k; uint16_t m_m; bool m_canonical; hasher_type m_hasher; - minimizers m_minimizers; - buckets m_buckets; - skew_index m_skew_index; + + spectrum_preserving_string_set m_spss; + sparse_and_skew_index m_ssi; + weights m_weights; - lookup_result lookup_uint_regular(kmer_t uint_kmer) const; - lookup_result lookup_uint_regular(kmer_t uint_kmer, minimizer_info mini_info) const; + lookup_result lookup_regular(Kmer uint_kmer) const; + lookup_result lookup_regular(Kmer uint_kmer, minimizer_info mini_info) const; - lookup_result lookup_uint_canonical(kmer_t uint_kmer) const; - lookup_result lookup_uint_canonical(kmer_t uint_kmer, kmer_t uint_kmer_rc, - minimizer_info mini_info) const; + lookup_result lookup_canonical(Kmer uint_kmer) const; + lookup_result lookup_canonical(Kmer uint_kmer, Kmer uint_kmer_rc, + minimizer_info mini_info) const; - void forward_neighbours(kmer_t suffix, neighbourhood& res, + void forward_neighbours(Kmer suffix, neighbourhood& res, bool check_reverse_complement) const; - void backward_neighbours(kmer_t prefix, neighbourhood& res, + void backward_neighbours(Kmer prefix, neighbourhood& res, bool check_reverse_complement) const; - kmer_t get_prefix(kmer_t kmer) const; - kmer_t get_suffix(kmer_t kmer) const; + Kmer get_prefix(Kmer kmer) const; + Kmer get_suffix(Kmer kmer) const; }; } // namespace sshash diff --git a/include/dictionary_types.hpp b/include/dictionary_types.hpp new file mode 100644 index 0000000..ef77ca0 --- /dev/null +++ b/include/dictionary_types.hpp @@ -0,0 +1,12 @@ +#pragma once + +#include "dictionary.hpp" +#include "offsets.hpp" +#include "kmer.hpp" + +namespace sshash { + +using dictionary_type = dictionary; +// using dictionary_type = dictionary; + +} // namespace sshash \ No newline at end of file diff --git a/include/hash_util.hpp b/include/hash_util.hpp index 6611c37..d6b4fbc 100644 --- a/include/hash_util.hpp +++ b/include/hash_util.hpp @@ -1,97 +1,91 @@ #pragma once #include "external/pthash/include/pthash.hpp" +#include "external/cityhash/cityhash.cpp" #include "constants.hpp" namespace sshash { -template -struct kmers_pthash_hasher_64 { - typedef pthash::hash64 hash_type; - - /* specialization for kmer_t */ - static inline pthash::hash64 hash(kmer_t x, uint64_t seed) { - uint64_t hash = 0; - for (int i = 0; i < kmer_t::uint_kmer_bits; i += 64) { - uint64_t block = x.pop64(); - hash ^= pthash::MurmurHash2_64(reinterpret_cast(&block), sizeof(block), - seed + i); - } - return hash; +struct minimizers_city_hasher_128 { + typedef pthash::hash128 hash_type; + + static inline pthash::hash128 hash(uint64_t const minimizer, uint64_t seed) { + auto ret = CityMurmur(reinterpret_cast(&minimizer), // + sizeof(minimizer), {seed, ~seed}); + return {ret.first, ret.second}; } }; -template -struct kmers_pthash_hasher_128 { +struct minimizers_xx_hasher_128 { typedef pthash::hash128 hash_type; - /* specialization for kmer_t */ - static inline pthash::hash128 hash(kmer_t x, uint64_t seed) { - uint64_t hash_first = 0; - uint64_t hash_second = 0; - for (int i = 0; i < kmer_t::uint_kmer_bits; i += 64) { - uint64_t block = x.pop64(); - hash_first ^= pthash::MurmurHash2_64(reinterpret_cast(&block), - sizeof(block), seed + i); - hash_second ^= pthash::MurmurHash2_64(reinterpret_cast(&block), - sizeof(block), ~seed + i); - } - return {hash_first, hash_second}; + static inline pthash::hash128 hash(uint64_t const minimizer, uint64_t seed) { + /* + Cannot use XXH128 directly because on some processors (e.g., AMD) + it just does not work in Release mode, e.g., when compiling *without* + sanitizers: -fsanitize=address -fno-omit-frame-pointer. + We therefore rely on XXH64 that produces 64-bit hashes but does not + seem to have any issue. + */ + uint8_t const* begin = reinterpret_cast(&minimizer); + uint8_t const* end = begin + sizeof(minimizer); + return {XXH64(begin, end - begin, seed), XXH64(begin, end - begin, ~seed)}; } }; -using minimizers_base_hasher_type = pthash::murmurhash2_128; - -using minimizers_pthash_type = - pthash::partitioned_phf; // search type +// using minimizers_base_hasher_type = minimizers_xx_hasher_128; +using minimizers_base_hasher_type = minimizers_city_hasher_128; -template -using kmers_base_hasher_type = kmers_pthash_hasher_128; +using minimizers_pthash_type = // + pthash::partitioned_phf< // + minimizers_base_hasher_type, // base hasher + pthash::opt_bucketer, // bucketer type + pthash::compact, // encoder type + true // minimal output + >; // -template -using kmers_pthash_type = - pthash::partitioned_phf, // base hasher - pthash::skew_bucketer, // bucketer type - pthash::dictionary_dictionary, // encoder type - true, // minimal output - pthash::pthash_search_type::xor_displacement>; // search type +template +struct kmers_xx_hasher_128 { + typedef pthash::hash128 hash_type; -struct murmurhash2_64 { - murmurhash2_64() { seed(0); } - murmurhash2_64(const uint64_t seed) { this->seed(seed); } + static inline pthash::hash128 hash(Kmer const x, uint64_t seed) { + uint8_t const* begin = reinterpret_cast(&(x.bits)); + uint8_t const* end = begin + sizeof(x.bits); + return {XXH64(begin, end - begin, seed), XXH64(begin, end - begin, ~seed)}; + } +}; - void seed(const uint64_t seed) { m_seed = seed; } +template +struct kmers_city_hasher_128 { + typedef pthash::hash128 hash_type; - /* specialization for uint64_t */ - inline uint64_t hash(uint64_t x) const { - return pthash::MurmurHash2_64(reinterpret_cast(&x), sizeof(x), m_seed); + static inline pthash::hash128 hash(Kmer const x, uint64_t seed) { + auto ret = CityMurmur(reinterpret_cast(&(x.bits)), // + sizeof(x.bits), {seed, ~seed}); + return {ret.first, ret.second}; } +}; - template - void visit(Visitor& visitor) const { - visitor.visit(m_seed); - } +// template +// using kmers_base_hasher_type = kmers_xx_hasher_128; - template - void visit(Visitor& visitor) { - visitor.visit(m_seed); - } +template +using kmers_base_hasher_type = kmers_city_hasher_128; -private: - uint64_t m_seed; -}; +template +using kmers_pthash_type = // + pthash::partitioned_phf< // + kmers_base_hasher_type, // base hasher + pthash::opt_bucketer, // bucketer type + pthash::compact, // encoder type + true // minimal output + >; // struct mixer_64 { mixer_64() { seed(0); } mixer_64(const uint64_t seed) { this->seed(seed); } - void seed(const uint64_t seed) { - m_magic = pthash::MurmurHash2_64(reinterpret_cast(&seed), sizeof(seed), 0); - } + void seed(const uint64_t seed) { m_magic = pthash::xxhash_64::hash(seed, 0).first(); } /* specialization for uint64_t */ inline uint64_t hash(uint64_t x) const { return (x * 0x517cc1b727220a95) ^ m_magic; } diff --git a/include/kmer.hpp b/include/kmer.hpp index ea6b061..6fe1520 100644 --- a/include/kmer.hpp +++ b/include/kmer.hpp @@ -3,6 +3,8 @@ // #include "bitpack.hpp" // #include +#include // for uint types +#include #include // template @@ -14,34 +16,32 @@ namespace sshash { template struct uint_kmer_t { - using uint_t = Kmer; - Kmer kmer = 0; + Kmer bits = 0; uint_kmer_t() {} - uint_kmer_t(uint64_t kmer) : kmer(kmer) {} + uint_kmer_t(uint64_t bits) : bits(bits) {} virtual ~uint_kmer_t() = default; explicit operator uint64_t() const { if constexpr (std::is_constructible_v) { - return static_cast(kmer); + return static_cast(bits); } else { // std::bitset? - return (kmer & Kmer(uint64_t(-1))).to_ulong(); + return (bits & Kmer(uint64_t(-1))).to_ulong(); } } - // TODO: change to <=> when switching to C++20 - bool operator==(uint_kmer_t const& t) const { return kmer == t.kmer; } - bool operator!=(uint_kmer_t const& t) const { return kmer != t.kmer; } - bool operator<(uint_kmer_t const& t) const { return kmer < t.kmer; } + bool operator==(uint_kmer_t const& t) const { return bits == t.bits; } + bool operator!=(uint_kmer_t const& t) const { return bits != t.bits; } + bool operator<(uint_kmer_t const& t) const { return bits < t.bits; } - void pad(uint16_t b) { kmer <<= b; } + void pad(uint16_t b) { bits <<= b; } void pad_char() { pad(bits_per_char); } - void drop(uint16_t b) { kmer >>= b; } + void drop(uint16_t b) { bits >>= b; } void drop64() { if constexpr (uint_kmer_bits == 64) { - kmer = 0; + bits = 0; } else { drop(64); } @@ -49,8 +49,8 @@ struct uint_kmer_t { void drop_char() { drop(bits_per_char); } void drop_chars(uint16_t k) { drop(k * bits_per_char); } - void take(uint16_t b) { kmer &= ~(~Kmer(0) << b); } - void take64() { kmer &= Kmer(uint64_t(-1)); } + void take(uint16_t b) { bits &= ~(~Kmer(0) << b); } + void take64() { bits &= Kmer(uint64_t(-1)); } void take_char() { take(bits_per_char); } void take_chars(uint16_t k) { take(k * bits_per_char); } @@ -66,31 +66,26 @@ struct uint_kmer_t { return res; } - void append(uint16_t b, uint64_t n) { - assert(b < uint_kmer_bits); - kmer = (kmer << b) | Kmer(n); - } void append64(uint64_t n) { if constexpr (uint_kmer_bits == 64) { - kmer = n; + bits = n; } else { - append(64, n); + assert(64 < uint_kmer_bits); + bits = (bits << 64) | Kmer(n); } } - void append_char(uint64_t c) { append(bits_per_char, c); } /* Set the char at position i to c, assuming that the position is empty. */ - void set(uint16_t i, uint64_t c) { kmer |= Kmer(c) << (i * bits_per_char); } + void set(uint16_t i, uint64_t c) { bits |= Kmer(c) << (i * bits_per_char); } /* Returns the char at position i. */ uint64_t at(uint16_t i) const { - return (kmer >> (i * bits_per_char)) & ((uint64_t(1) << bits_per_char) - 1); + return (bits >> (i * bits_per_char)) & ((uint64_t(1) << bits_per_char) - 1); } static constexpr uint16_t uint_kmer_bits = 8 * sizeof(Kmer); static constexpr uint8_t bits_per_char = BitsPerChar; - static_assert(uint_kmer_bits % 64 == 0, "Kmer must use 64*k bits"); static_assert(bits_per_char < 64, "BitsPerChar must be less than 64"); @@ -113,7 +108,7 @@ struct alpha_kmer_t : uint_kmer_t { [[maybe_unused]] virtual void reverse_complement_inplace(uint64_t) {} [[maybe_unused]] static void compute_reverse_complement(char const* input, char* output, uint64_t size) { - for (uint64_t i = 0; i != size; ++i) { output[i] = input[i]; } + for (uint64_t i = 0; i != size; ++i) output[i] = input[i]; } }; @@ -288,15 +283,21 @@ struct aa_uint_kmer_t : alpha_kmer_t { // For proteins, there's no reverse complement, so map each character to itself // This allows streaming_query to work with protein alphabets static constexpr char canonicalize_basepair_reverse_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 0, 0, 0, 0, 0, - 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }; + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', + 'Z', 0, 0, 0, 0, 0, 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', + 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0}; }; // also supports bitpack<__uint128_t, 1>, std::bitset<256>, etc diff --git a/include/kmer_iterator.hpp b/include/kmer_iterator.hpp index 640e7bc..0c07485 100644 --- a/include/kmer_iterator.hpp +++ b/include/kmer_iterator.hpp @@ -1,19 +1,18 @@ #pragma once -#include "external/pthash/external/bits/include/bit_vector.hpp" #include "util.hpp" namespace sshash { -template +template struct kmer_iterator // { kmer_iterator() {} - kmer_iterator(bits::bit_vector const& bv, const uint64_t k) - : m_bv(&bv), m_uint_kmer_bits(kmer_t::bits_per_char * k), m_pos(0), m_avail(0), m_buff(0) {} + kmer_iterator(BitVector const& bv, const uint64_t k) + : m_bv(&bv), m_uint_kmer_bits(Kmer::bits_per_char * k), m_pos(0), m_avail(0), m_buff(0) {} - kmer_iterator(bits::bit_vector const& bv, const uint64_t k, const uint64_t pos) + kmer_iterator(BitVector const& bv, const uint64_t k, const uint64_t pos) : kmer_iterator(bv, k) // { at(pos); @@ -25,38 +24,38 @@ struct kmer_iterator // m_buff = 0; } - kmer_t get() { + Kmer get() { if (m_avail < m_uint_kmer_bits) fill_buff(); auto kmer = m_buff; kmer.take(m_uint_kmer_bits); return kmer; } - kmer_t get_reverse() { + Kmer get_reverse() { if (m_avail < m_uint_kmer_bits) fill_buff_reverse(); auto kmer = m_buff; - kmer.drop(kmer_t::uint_kmer_bits - m_uint_kmer_bits); + kmer.drop(Kmer::uint_kmer_bits - m_uint_kmer_bits); return kmer; } void next() { - if (m_avail < kmer_t::bits_per_char) fill_buff(); + if (m_avail < Kmer::bits_per_char) fill_buff(); m_buff.drop_char(); - m_avail -= kmer_t::bits_per_char; - m_pos += kmer_t::bits_per_char; + m_avail -= Kmer::bits_per_char; + m_pos += Kmer::bits_per_char; } void next_reverse() { - if (m_avail < kmer_t::bits_per_char) fill_buff_reverse(); + if (m_avail < Kmer::bits_per_char) fill_buff_reverse(); m_buff.pad_char(); - m_avail -= kmer_t::bits_per_char; - m_pos -= kmer_t::bits_per_char; + m_avail -= Kmer::bits_per_char; + m_pos -= Kmer::bits_per_char; } inline uint64_t get_next_char() { - if (m_avail < kmer_t::bits_per_char) fill_buff(); - m_avail -= kmer_t::bits_per_char; - m_pos += kmer_t::bits_per_char; + if (m_avail < Kmer::bits_per_char) fill_buff(); + m_avail -= Kmer::bits_per_char; + m_pos += Kmer::bits_per_char; return m_buff.pop_char(); } @@ -64,27 +63,26 @@ struct kmer_iterator // private: inline void fill_buff() { - static_assert(kmer_t::uint_kmer_bits % 64 == 0); - for (int i = kmer_t::uint_kmer_bits - 64; i >= 0; i -= 64) { + static_assert(Kmer::uint_kmer_bits % 64 == 0); + for (int i = Kmer::uint_kmer_bits - 64; i >= 0; i -= 64) { m_buff.append64(m_bv->get_word64(m_pos + i)); } - m_avail = kmer_t::uint_kmer_bits; + m_avail = Kmer::uint_kmer_bits; } inline void fill_buff_reverse() { - static_assert(kmer_t::uint_kmer_bits % 64 == 0); - for (int i = kmer_t::uint_kmer_bits; i > 0; i -= 64) { - m_buff.append64( - m_bv->get_word64(std::max(m_pos, kmer_t::uint_kmer_bits) - i)); + static_assert(Kmer::uint_kmer_bits % 64 == 0); + for (int i = Kmer::uint_kmer_bits; i > 0; i -= 64) { + m_buff.append64(m_bv->get_word64(std::max(m_pos, Kmer::uint_kmer_bits) - i)); } - m_avail = std::min(m_pos, kmer_t::uint_kmer_bits); - m_buff.pad(kmer_t::uint_kmer_bits - m_avail); + m_avail = std::min(m_pos, Kmer::uint_kmer_bits); + m_buff.pad(Kmer::uint_kmer_bits - m_avail); } - bits::bit_vector const* m_bv; + BitVector const* m_bv; uint64_t m_uint_kmer_bits; uint64_t m_pos, m_avail; - kmer_t m_buff; + Kmer m_buff; }; } // namespace sshash \ No newline at end of file diff --git a/include/minimizer_iterator.hpp b/include/minimizer_iterator.hpp index 0672a37..83f6898 100644 --- a/include/minimizer_iterator.hpp +++ b/include/minimizer_iterator.hpp @@ -7,7 +7,7 @@ namespace sshash { /* "Re-scan" method. */ -template +template struct minimizer_iterator { minimizer_iterator() {} @@ -32,14 +32,14 @@ struct minimizer_iterator { m_min_position = m_position - 1; } - minimizer_info next(kmer_t kmer) { + minimizer_info next(Kmer kmer) { if (m_min_pos_in_kmer == 0) { /* min leaves the window: re-scan to compute the new min */ m_position = m_min_position + 1; rescan(kmer); } else { m_position += 1; - kmer_t mmer = kmer; + Kmer mmer = kmer; mmer.drop_chars(m_k - m_m); uint64_t hash = m_hasher.hash(uint64_t(mmer)); if (hash < m_min_hash) { @@ -54,7 +54,7 @@ struct minimizer_iterator { } assert(minimizer_info(m_min_value, m_min_pos_in_kmer) == - util::compute_minimizer(kmer, m_k, m_m, m_hasher)); + util::compute_minimizer(kmer, m_k, m_m, m_hasher)); return {m_min_value, m_min_position, m_min_pos_in_kmer}; } @@ -65,13 +65,13 @@ struct minimizer_iterator { uint64_t m_min_value, m_min_position, m_min_hash; hasher_type m_hasher; - void rescan(kmer_t kmer) { + void rescan(Kmer kmer) { m_min_hash = constants::invalid_uint64; m_min_value = constants::invalid_uint64; m_min_pos_in_kmer = 0; uint64_t begin = m_position; for (uint64_t i = 0; i != m_k - m_m + 1; ++i, ++m_position) { - kmer_t mmer = kmer; + Kmer mmer = kmer; kmer.drop_char(); mmer.take_chars(m_m); uint64_t hash = m_hasher.hash(uint64_t(mmer)); @@ -89,7 +89,7 @@ struct minimizer_iterator { /* "Re-scan" method. */ -template +template struct minimizer_iterator_rc { minimizer_iterator_rc() {} @@ -114,14 +114,14 @@ struct minimizer_iterator_rc { m_min_position = m_position - 1; } - minimizer_info next(kmer_t kmer) { + minimizer_info next(Kmer kmer) { if (m_min_pos_in_kmer == m_k - m_m) { /* min leaves the window: re-scan to compute the new min */ m_position = m_min_position + 1; rescan(kmer); } else { m_position += 1; - kmer_t mmer = kmer; + Kmer mmer = kmer; mmer.take_chars(m_m); uint64_t hash = m_hasher.hash(uint64_t(mmer)); if (hash <= m_min_hash) { @@ -136,7 +136,7 @@ struct minimizer_iterator_rc { } assert(minimizer_info(m_min_value, m_min_pos_in_kmer) == - util::compute_minimizer(kmer, m_k, m_m, m_hasher)); + util::compute_minimizer(kmer, m_k, m_m, m_hasher)); return {m_min_value, m_min_position, m_min_pos_in_kmer}; } @@ -147,13 +147,13 @@ struct minimizer_iterator_rc { uint64_t m_min_value, m_min_position, m_min_hash; hasher_type m_hasher; - void rescan(kmer_t kmer) { + void rescan(Kmer kmer) { m_min_hash = constants::invalid_uint64; m_min_value = constants::invalid_uint64; m_min_pos_in_kmer = 0; uint64_t begin = m_position; for (int64_t i = m_k - m_m; i >= 0; --i, ++m_position) { - kmer_t mmer = kmer; + Kmer mmer = kmer; mmer.drop_chars(i); mmer.take_chars(m_m); uint64_t hash = m_hasher.hash(uint64_t(mmer)); diff --git a/include/minimizers.hpp b/include/minimizers_control_map.hpp similarity index 59% rename from include/minimizers.hpp rename to include/minimizers_control_map.hpp index f9ecf24..914df86 100644 --- a/include/minimizers.hpp +++ b/include/minimizers_control_map.hpp @@ -1,15 +1,15 @@ #pragma once -#include "util.hpp" - namespace sshash { -struct minimizers { +struct minimizers_control_map // +{ template - void build(ForwardIterator begin, uint64_t size, build_configuration const& build_config) // + void build(ForwardIterator begin, const uint64_t size, // + build_configuration const& build_config) // { pthash::build_configuration mphf_build_config; - mphf_build_config.lambda = 5.0; + mphf_build_config.lambda = build_config.lambda; mphf_build_config.alpha = 0.94; mphf_build_config.seed = util::get_seed_for_hash_function(build_config); mphf_build_config.verbose = false; @@ -30,29 +30,37 @@ struct minimizers { << std::endl; } - m_mphf.build_in_external_memory(begin, size, mphf_build_config); + mphf.build_in_external_memory(begin, size, mphf_build_config); } - uint64_t lookup(uint64_t uint64_minimizer) const { - uint64_t bucket_id = m_mphf(uint64_minimizer); - return bucket_id; + uint64_t lookup(uint64_t minimizer) const { + uint64_t minimizer_id = mphf(minimizer); + return control_codewords.access(minimizer_id); } - uint64_t size() const { return m_mphf.num_keys(); } - uint64_t num_bits() const { return m_mphf.num_bits(); } + uint64_t size() const { return mphf.num_keys(); } + + uint64_t num_bits() const { return mphf.num_bits() + 8 * control_codewords.num_bytes(); } template void visit(Visitor& visitor) const { - visitor.visit(m_mphf); + visit_impl(visitor, *this); } template void visit(Visitor& visitor) { - visitor.visit(m_mphf); + visit_impl(visitor, *this); } + minimizers_pthash_type mphf; + bits::compact_vector control_codewords; + private: - minimizers_pthash_type m_mphf; + template + static void visit_impl(Visitor& visitor, T&& t) { + visitor.visit(t.mphf); + visitor.visit(t.control_codewords); + } }; -} // namespace sshash +} // namespace sshash \ No newline at end of file diff --git a/include/offsets.hpp b/include/offsets.hpp new file mode 100644 index 0000000..2d84531 --- /dev/null +++ b/include/offsets.hpp @@ -0,0 +1,215 @@ +#pragma once + +#include "external/pthash/external/bits/include/compact_vector.hpp" +#include "external/pthash/external/bits/include/endpoints_sequence.hpp" + +namespace sshash { + +struct num_bits { + num_bits() : per_absolute_offset(0), per_relative_offset(0), per_string_id(0) {} + uint64_t per_absolute_offset; + uint64_t per_relative_offset; + uint64_t per_string_id; +}; + +template +struct offsets // +{ + struct builder { + builder() {} + + void reserve(uint64_t n) { m_v.reserve(n); } + void push_back(uint64_t val) { m_v.push_back(val); } + + uint64_t operator[](uint64_t i) { + assert(i < m_v.size()); + return m_v[i]; + } + + uint64_t front() const { return m_v.front(); } + uint64_t back() const { return m_v.back(); } + uint64_t size() const { return m_v.size(); } + + void set_num_bits(num_bits nb) { m_nb = nb; } + + protected: + num_bits m_nb; + std::vector m_v; + }; + + std::pair id_to_offset(const uint64_t kmer_id, const uint64_t k) const // + { + constexpr uint64_t linear_scan_threshold = 32; + uint64_t lo = 0; + uint64_t hi = m_seq.size() - 1; + assert(m_seq.access(0) == 0); + while (hi - lo > linear_scan_threshold) { + uint64_t mid = lo + (hi - lo) / 2; + uint64_t val = m_seq.access(mid); + assert(val >= mid * (k - 1)); + if (kmer_id <= val - mid * (k - 1)) { + hi = mid; + } else { + lo = mid + 1; + } + } + assert(lo < hi); + assert(hi < m_seq.size()); + for (auto it = m_seq.get_iterator_at(lo); lo <= hi; ++lo, ++it) { + uint64_t val = *it - lo * (k - 1); + if (val > kmer_id) break; + } + assert(lo > 0); + return {lo, kmer_id + (lo - 1) * (k - 1)}; + } + + uint64_t access(uint64_t i) const { + assert(i < size()); + return m_seq.access(i); + } + + uint64_t size() const { return m_seq.size(); } + + uint64_t num_bytes() const { + return sizeof(m_num_bits_per_relative_offset) + m_seq.num_bytes(); + } + + struct iterator { + iterator() {} + iterator(offsets const* e, uint64_t pos) { m_it = e->m_seq.get_iterator_at(pos); } + + uint64_t value() const { return *m_it; } + void next() { ++m_it; } + + private: + typename Seq::iterator m_it; + }; + + iterator get_iterator_at(uint64_t pos) const { + assert(pos < size()); + return {this, pos}; + } + + template + void visit(Visitor& visitor) const { + visit_impl(visitor, *this); + } + + template + void visit(Visitor& visitor) { + visit_impl(visitor, *this); + } + +protected: + Seq m_seq; + uint64_t m_num_bits_per_relative_offset; + + template + static void visit_impl(Visitor& visitor, T&& t) { + visitor.visit(t.m_num_bits_per_relative_offset); + visitor.visit(t.m_seq); + } +}; + +struct decoded_offsets + : public offsets> // +{ + struct builder + : public offsets>::builder // + { + uint64_t num_bits_per_offset() const { return m_nb.per_absolute_offset; } + + uint64_t encode(uint64_t offset, uint64_t, uint64_t) { return offset; } + + void build(decoded_offsets& e) { + assert(std::is_sorted(m_v.begin(), m_v.end())); + e.m_seq.encode(m_v.begin(), m_v.size(), m_v.back()); + std::vector().swap(m_v); + } + }; + + struct decoded_offset { + uint64_t absolute_offset; + }; + + decoded_offset decode(const uint64_t encoded_offset) const { return {encoded_offset}; } + + void offset_to_id(lookup_result& res, decoded_offset /* p */, const uint64_t k) const // + { + assert(res.kmer_offset != constants::invalid_uint64); + + auto q = m_seq.locate(res.kmer_offset); + res.string_id = q.first.pos; + res.string_begin = q.first.val; + res.string_end = q.second.val; + res.kmer_id = res.kmer_offset - res.string_id * (k - 1); // absolute kmer id + res.kmer_id_in_string = res.kmer_offset - res.string_begin; // relative kmer id + + assert(res.string_id < m_seq.size()); + assert(res.string_begin < res.string_end); + assert(res.kmer_offset >= res.string_id * (k - 1)); + assert(res.string_begin <= res.kmer_offset); + assert(res.kmer_offset < res.string_end); + } +}; + +struct encoded_offsets + : public offsets // +{ + struct builder + : public offsets::builder // + { + uint64_t num_bits_per_offset() const { + return m_nb.per_string_id + m_nb.per_relative_offset; + } + + uint64_t encode(uint64_t offset, uint64_t begin, uint64_t string_id) { + /* encode offset as | string-id | relative offset | */ + assert(string_id < m_v.size()); + assert(offset >= begin); + assert((offset - begin) < (1ULL << m_nb.per_relative_offset)); + uint64_t relative_offset = offset - begin; + return (string_id << m_nb.per_relative_offset) + relative_offset; + } + + void build(encoded_offsets& e) { + assert(std::is_sorted(m_v.begin(), m_v.end())); + e.m_seq.build(m_v.begin(), m_v.size(), m_nb.per_absolute_offset); + e.m_num_bits_per_relative_offset = m_nb.per_relative_offset; + std::vector().swap(m_v); + } + }; + + struct decoded_offset { + uint64_t absolute_offset; + uint64_t relative_offset; + uint64_t string_id; + uint64_t string_begin; + uint64_t string_end; + }; + + decoded_offset decode(const uint64_t encoded_offset) const { + uint64_t relative_offset = encoded_offset & ((1ULL << m_num_bits_per_relative_offset) - 1); + uint64_t string_id = encoded_offset >> m_num_bits_per_relative_offset; + assert(string_id + 1 < m_seq.size()); + uint64_t begin = m_seq.access(string_id); + uint64_t end = m_seq.access(string_id + 1); + return {begin + relative_offset, relative_offset, string_id, begin, end}; + } + + void offset_to_id(lookup_result& res, decoded_offset p, const uint64_t k) const // + { + assert(res.kmer_offset != constants::invalid_uint64); + + res.string_id = p.string_id; + res.string_begin = p.string_begin; + res.string_end = p.string_end; + res.kmer_id = res.kmer_offset - res.string_id * (k - 1); // absolute kmer id + res.kmer_id_in_string = res.kmer_offset - res.string_begin; // relative kmer id + + assert(res.string_id < m_seq.size()); + assert(res.string_begin < res.string_end); + } +}; + +} // namespace sshash \ No newline at end of file diff --git a/include/skew_index.hpp b/include/skew_index.hpp deleted file mode 100644 index 4690456..0000000 --- a/include/skew_index.hpp +++ /dev/null @@ -1,90 +0,0 @@ -#pragma once - -#include "util.hpp" - -namespace sshash { - -template -struct skew_index { - skew_index() : min_log2(constants::min_l), max_log2(constants::max_l), log2_max_bucket_size(0) { - mphfs.resize(0); - positions.resize(0); - } - - /* Returns the number of kmers in the index. */ - uint64_t print_info() const { - uint64_t num_partitions = mphfs.size(); - uint64_t lower = 1ULL << min_log2; - uint64_t upper = 2 * lower; - uint64_t num_kmers_in_skew_index = 0; - for (uint64_t partition_id = 0; partition_id != num_partitions; ++partition_id) { - uint64_t n = mphfs[partition_id].num_keys(); - assert(n == positions[partition_id].size()); - std::cout << "num_kmers belonging to buckets of size > " << lower << " and <= " << upper - << ": " << n << "; "; - std::cout << "bits/kmer = " << static_cast(mphfs[partition_id].num_bits()) / n - << " (mphf) + " << (positions[partition_id].num_bytes() * 8.0) / n - << " (positions)\n"; - num_kmers_in_skew_index += n; - lower = upper; - upper = 2 * lower; - } - return num_kmers_in_skew_index; - } - - bool empty() const { return mphfs.empty(); } - - uint64_t lookup(kmer_t uint_kmer, uint64_t log2_bucket_size) const { - assert(log2_bucket_size >= uint64_t(min_log2 + 1)); - assert(log2_bucket_size <= log2_max_bucket_size); - uint64_t partition_id = log2_bucket_size - (min_log2 + 1); - if (log2_bucket_size == log2_max_bucket_size or log2_bucket_size > max_log2) { - partition_id = mphfs.size() - 1; - } - assert(partition_id < mphfs.size()); - auto const& f = mphfs[partition_id]; - auto const& p = positions[partition_id]; - uint64_t position = p.access(f(uint_kmer)); - return position; - } - - uint64_t num_bits() const { - uint64_t n = (sizeof(min_log2) + sizeof(max_log2) + sizeof(log2_max_bucket_size) + - 2 * sizeof(size_t) /* for std::vector::size */) * - 8; - for (uint64_t partition_id = 0; partition_id != mphfs.size(); ++partition_id) { - auto const& f = mphfs[partition_id]; - auto const& p = positions[partition_id]; - n += f.num_bits() + p.num_bytes() * 8; - } - return n; - } - - template - void visit(Visitor& visitor) const { - visit_impl(visitor, *this); - } - - template - void visit(Visitor& visitor) { - visit_impl(visitor, *this); - } - - uint16_t min_log2; - uint16_t max_log2; - uint32_t log2_max_bucket_size; - std::vector> mphfs; - std::vector positions; - -private: - template - static void visit_impl(Visitor& visitor, T&& t) { - visitor.visit(t.min_log2); - visitor.visit(t.max_log2); - visitor.visit(t.log2_max_bucket_size); - visitor.visit(t.mphfs); - visitor.visit(t.positions); - } -}; - -} // namespace sshash diff --git a/include/sparse_and_skew_index.hpp b/include/sparse_and_skew_index.hpp new file mode 100644 index 0000000..d2ede11 --- /dev/null +++ b/include/sparse_and_skew_index.hpp @@ -0,0 +1,173 @@ +#pragma once + +#include "util.hpp" +#include "minimizers_control_map.hpp" + +namespace sshash { + +template +struct skew_index // +{ + skew_index() { + mphfs.resize(0); + positions.resize(0); + } + + /* Returns the number of kmers in the index. */ + uint64_t print_info() const { + uint64_t num_partitions = mphfs.size(); + uint64_t lower = 1ULL << constants::min_l; + uint64_t upper = 2 * lower; + uint64_t num_kmers_in_skew_index = 0; + for (uint64_t partition_id = 0; partition_id != num_partitions; ++partition_id) { + uint64_t n = mphfs[partition_id].num_keys(); + assert(n == positions[partition_id].size()); + std::cout << "num_kmers belonging to buckets of size > " << lower << " and <= " << upper + << ": " << n << "; "; + std::cout << "bits/kmer = " << static_cast(mphfs[partition_id].num_bits()) / n + << " (mphf) + " << (positions[partition_id].num_bytes() * 8.0) / n + << " (positions)\n"; + num_kmers_in_skew_index += n; + lower = upper; + upper = 2 * lower; + } + return num_kmers_in_skew_index; + } + + uint64_t lookup(const Kmer uint_kmer, uint64_t code) const { + code >>= 2; + uint64_t partition_id = code & 7; + uint64_t begin = code >> 3; + assert(partition_id < mphfs.size()); + auto const& f = mphfs[partition_id]; + auto const& p = positions[partition_id]; + uint64_t pos_in_bucket = p.access(f(uint_kmer)); + uint64_t offset = heavy_load_buckets.access(begin + pos_in_bucket); + return offset; + } + + uint64_t num_bits() const { + uint64_t n = (2 * sizeof(size_t)) * 8; /* for std::vector::size */ + for (uint64_t partition_id = 0; partition_id != mphfs.size(); ++partition_id) { + auto const& f = mphfs[partition_id]; + auto const& p = positions[partition_id]; + n += f.num_bits() + p.num_bytes() * 8; + } + return n + 8 * heavy_load_buckets.num_bytes(); + } + + template + void visit(Visitor& visitor) const { + visit_impl(visitor, *this); + } + + template + void visit(Visitor& visitor) { + visit_impl(visitor, *this); + } + + std::vector> mphfs; + std::vector positions; + bits::compact_vector heavy_load_buckets; + +private: + template + static void visit_impl(Visitor& visitor, T&& t) { + visitor.visit(t.mphfs); + visitor.visit(t.positions); + visitor.visit(t.heavy_load_buckets); + } +}; + +template +struct sparse_and_skew_index // +{ + struct bucket_iterator { + bucket_iterator(sparse_and_skew_index const* ssi, uint64_t pos, uint64_t size, + bucket_t bucket_type) + : m_size(size) + , m_offset(constants::invalid_uint64) + , m_bucket_type(bucket_type) // + { + assert(size > 0); + if (size == 1) { + m_offset = pos; + } else { + m_it = ssi->mid_load_buckets.get_iterator_at(pos); + } + } + + uint64_t operator*() const { return m_size == 1 ? m_offset : *m_it; } + void operator++() { + if (m_size != 1) ++m_it; + } + + uint64_t size() const { return m_size; } + bucket_t bucket_type() const { return m_bucket_type; } + + private: + uint64_t m_size; + uint64_t m_offset; + bucket_t m_bucket_type; + bits::compact_vector::iterator m_it; + }; + + bucket_iterator lookup(const Kmer uint_kmer, const minimizer_info mini_info) const // + { + uint64_t code = codewords.lookup(mini_info.minimizer); + + uint64_t status = code & 1; + if (status == bucket_t::SINGLETON) { // minimizer occurs once + uint64_t offset = code >> 1; + return {this, offset, 1, bucket_t::SINGLETON}; + } + + status = code & 3; + if (status == bucket_t::MIDLOAD) { // minimizer occurs more than once, but is not part of + // the skew index + constexpr uint64_t mask = (uint64_t(1) << constants::min_l) - 1; + code >>= 2; + uint64_t bucket_size = (code & mask) + 2; + uint64_t bucket_id = code >> constants::min_l; + assert(bucket_size < begin_buckets_of_size.size()); + uint64_t begin = begin_buckets_of_size[bucket_size] + bucket_id * bucket_size; + return {this, begin, bucket_size, bucket_t::MIDLOAD}; + } + + assert(status == bucket_t::HEAVYLOAD); // minimizer is part of the skew index + uint64_t offset = ski.lookup(uint_kmer, code); + return {this, offset, 1, bucket_t::HEAVYLOAD}; + } + + uint64_t num_bits() const { + return codewords.num_bits() + + 8 * (essentials::vec_bytes(begin_buckets_of_size) + mid_load_buckets.num_bytes()) + + ski.num_bits(); + } + + template + void visit(Visitor& visitor) const { + visit_impl(visitor, *this); + } + + template + void visit(Visitor& visitor) { + visit_impl(visitor, *this); + } + + minimizers_control_map codewords; + std::vector begin_buckets_of_size; + bits::compact_vector mid_load_buckets; + skew_index ski; + +private: + template + static void visit_impl(Visitor& visitor, T&& t) { + visitor.visit(t.codewords); + visitor.visit(t.begin_buckets_of_size); + visitor.visit(t.mid_load_buckets); + visitor.visit(t.ski); + } +}; + +} // namespace sshash \ No newline at end of file diff --git a/include/spectrum_preserving_string_set.hpp b/include/spectrum_preserving_string_set.hpp new file mode 100644 index 0000000..d5aacd2 --- /dev/null +++ b/include/spectrum_preserving_string_set.hpp @@ -0,0 +1,282 @@ +#pragma once + +#include "kmer_iterator.hpp" + +namespace sshash { + +template +struct spectrum_preserving_string_set // +{ + /* Return where the string begins and ends in `strings`. */ + std::pair // [begin, end) + string_offsets(const uint64_t string_id) const { + uint64_t begin = strings_offsets.access(string_id); + uint64_t end = strings_offsets.access(string_id + 1); + assert(end > begin); + return {begin, end}; + } + + Kmer string_prefix(const uint64_t string_id) const { + uint64_t string_begin = strings_offsets.access(string_id); + return util::read_kmer_at(strings, k - 1, Kmer::bits_per_char * string_begin); + } + + Kmer string_suffix(const uint64_t string_id) const { + uint64_t string_end = strings_offsets.access(string_id + 1); + return util::read_kmer_at(strings, k - 1, Kmer::bits_per_char * (string_end - k + 1)); + } + + template + lookup_result lookup_regular(Iterator it, // + const Kmer kmer, // + const minimizer_info mini_info) const // + { + const uint64_t size = it.size(); + assert(size > 0); + + static thread_local // + std::array + v; + + for (uint64_t i = 0; i != size; ++i, ++it) { + uint64_t minimizer_offset = *it; + v[i] = strings_offsets.decode(minimizer_offset); + } + + /* check minimizer first */ + if (uint64_t read_mmer = uint64_t( + util::read_kmer_at(strings, m, Kmer::bits_per_char * v[0].absolute_offset)); + read_mmer != mini_info.minimizer) // + { + /* + The function `lookup_regular` determines if the minimizer is found at the + offset `Kmer::bits_per_char * p.absolute_offset`, not whether the minimizer + does not appear at all. In fact, it can happen that the minimizer appear but + not at the specified offset, so it would be wrong to set `res.minimizer_found` + to `false`. This can happen for HEAVYLOAD buckets only because their lookup is + resolved via the skew index and `pos_in_bucket` might be larger than the size + of the bucket (which we do not know for a HEAVYLOAD bucket). Since for streaming + queries we keep track of the presence of minimizers (i.e., whether they appear + in the index or not), only in this special case we set + `res.minimizer_found` to `true` to indicate that we do not know whether the + minimizer appears in the index or not. + */ + return lookup_result(it.bucket_type() != bucket_t::HEAVYLOAD ? false : true); + } + + lookup_result res; + for (uint64_t i = 0; i != size; ++i) { + if (_lookup_regular(res, v[i], kmer, mini_info)) return res; + } + + return lookup_result(); + } + + template + lookup_result lookup_canonical(Iterator it, // + const Kmer kmer, const Kmer kmer_rc, // + const minimizer_info mini_info) const // + { + const uint64_t size = it.size(); + assert(size > 0); + + static thread_local // + std::array + v; + + for (uint64_t i = 0; i != size; ++i, ++it) { + uint64_t minimizer_offset = *it; + v[i] = strings_offsets.decode(minimizer_offset); + } + + /* check minimizer first */ + if (uint64_t read_mmer = uint64_t( + util::read_kmer_at(strings, m, Kmer::bits_per_char * v[0].absolute_offset)); + read_mmer != mini_info.minimizer) // + { + Kmer tmp = mini_info.minimizer; + tmp.reverse_complement_inplace(m); + uint64_t minimizer_rc = uint64_t(tmp); + if (read_mmer != minimizer_rc) { + /* Same note as for the function `lookup_regular`. */ + return lookup_result(it.bucket_type() != bucket_t::HEAVYLOAD ? false : true); + } + } + + lookup_result res; + for (uint64_t i = 0; i != size; ++i) { + if (_lookup_canonical(res, v[i], kmer, kmer_rc, mini_info)) return res; + } + + return lookup_result(); + } + + void access(const uint64_t kmer_id, char* string_kmer) const { + auto [_, offset] = strings_offsets.id_to_offset(kmer_id, k); + auto read_kmer = util::read_kmer_at(strings, k, Kmer::bits_per_char * offset); + util::uint_kmer_to_string(read_kmer, string_kmer, k); + } + + struct iterator { + iterator() {} + + iterator(spectrum_preserving_string_set const* ptr, // + const uint64_t begin_kmer_id, const uint64_t end_kmer_id, // [begin,end) + const uint64_t k) + : m_ptr(ptr) + , m_begin_kmer_id(begin_kmer_id) + , m_end_kmer_id(end_kmer_id) + , k(k) + , m_it(ptr->strings, k) // + { + auto [pos, val] = m_ptr->strings_offsets.id_to_offset(m_begin_kmer_id, k); + m_offset = val; + m_strings_offsets_it = m_ptr->strings_offsets.get_iterator_at(pos); + assert(m_strings_offsets_it.value() > m_offset); + next_piece(); + } + + bool has_next() const { return m_begin_kmer_id != m_end_kmer_id; } + + std::pair // (kmer-id, encoded kmer) + next() { + if (m_offset == m_next_offset - k + 1) { + m_offset = m_next_offset; + next_piece(); + } + m_ret.first = m_begin_kmer_id; + if (m_clear) { + m_ret.second = m_it.get(); + assert(Kmer::bits_per_char * m_offset == m_it.position()); + m_it.at(Kmer::bits_per_char * (m_offset + k)); + } else { + m_ret.second.drop_char(); + m_ret.second.set(k - 1, m_it.get_next_char()); + } + m_clear = false; + ++m_begin_kmer_id; + ++m_offset; + return m_ret; + } + + private: + std::pair m_ret; + spectrum_preserving_string_set const* m_ptr; + uint64_t m_begin_kmer_id, m_end_kmer_id; + uint64_t k; + uint64_t m_offset, m_next_offset; + kmer_iterator m_it; + typename Offsets::iterator m_strings_offsets_it; + bool m_clear; + + void next_piece() { + m_it.at(Kmer::bits_per_char * m_offset); + m_next_offset = m_strings_offsets_it.value(); + assert(m_next_offset > m_offset); + m_clear = true; + m_strings_offsets_it.next(); + } + }; + + iterator at(const uint64_t begin_kmer_id, const uint64_t end_kmer_id) const { + return iterator(this, begin_kmer_id, end_kmer_id, k); + } + + uint64_t num_bits() const { + return 8 * (sizeof(k) + sizeof(m) + strings_offsets.num_bytes() + strings.num_bytes()); + } + + template + void visit(Visitor& visitor) const { + visit_impl(visitor, *this); + } + + template + void visit(Visitor& visitor) { + visit_impl(visitor, *this); + } + + uint16_t k; + uint16_t m; + Offsets strings_offsets; + bits::bit_vector strings; + +private: + template + static void visit_impl(Visitor& visitor, T&& t) { + visitor.visit(t.k); + visitor.visit(t.m); + visitor.visit(t.strings_offsets); + visitor.visit(t.strings); + } + + bool _lookup_regular(lookup_result& res, // + typename Offsets::decoded_offset p, // + const Kmer kmer, // + const minimizer_info mini_info) const // + { + if (p.absolute_offset < mini_info.pos_in_kmer) return false; + + res.kmer_offset = p.absolute_offset - mini_info.pos_in_kmer; + + if (res.kmer_offset >= res.string_begin and res.kmer_offset < res.string_end) { + res.kmer_id = res.kmer_offset - res.string_id * (k - 1); // absolute kmer id + res.kmer_id_in_string = res.kmer_offset - res.string_begin; // relative kmer id + } else { + strings_offsets.offset_to_id(res, p, k); + } + + if (res.kmer_offset >= res.string_begin and res.kmer_offset < res.string_end - k + 1 and // + kmer == util::read_kmer_at(strings, k, Kmer::bits_per_char * res.kmer_offset)) // + { + return true; + } + + return false; + } + + bool _lookup_canonical(lookup_result& res, // + typename Offsets::decoded_offset p, // + const Kmer kmer, // + const Kmer kmer_rc, // + const minimizer_info mini_info) const // + { + uint64_t pos_in_kmer = mini_info.pos_in_kmer; + if (__lookup_canonical(res, p, kmer, kmer_rc, pos_in_kmer)) return true; + pos_in_kmer = k - m - mini_info.pos_in_kmer; + return __lookup_canonical(res, p, kmer, kmer_rc, pos_in_kmer); + } + + bool __lookup_canonical(lookup_result& res, // + typename Offsets::decoded_offset p, // + const Kmer kmer, // + const Kmer kmer_rc, // + const uint64_t pos_in_kmer) const // + { + if (p.absolute_offset < pos_in_kmer) return false; + + res.kmer_offset = p.absolute_offset - pos_in_kmer; + + if (res.kmer_offset >= res.string_begin and res.kmer_offset < res.string_end) { + res.kmer_id = res.kmer_offset - res.string_id * (k - 1); // absolute kmer id + res.kmer_id_in_string = res.kmer_offset - res.string_begin; // relative kmer id + } else { + strings_offsets.offset_to_id(res, p, k); + } + + if (res.kmer_offset >= res.string_begin and res.kmer_offset < res.string_end - k + 1) // + { + auto read_kmer = + util::read_kmer_at(strings, k, Kmer::bits_per_char * res.kmer_offset); + if (read_kmer == kmer) return true; + if (read_kmer == kmer_rc) { + res.kmer_orientation = constants::backward_orientation; + return true; + } + } + + return false; + } +}; + +} // namespace sshash \ No newline at end of file diff --git a/include/streaming_query.hpp b/include/streaming_query.hpp index 163cbd4..3f1e1ab 100644 --- a/include/streaming_query.hpp +++ b/include/streaming_query.hpp @@ -6,9 +6,12 @@ namespace sshash { -template -struct streaming_query { - streaming_query(dictionary const* dict) +template +struct streaming_query // +{ + using kmer_t = typename Dict::kmer_type; + + streaming_query(Dict const* dict) : m_dict(dict) @@ -25,8 +28,8 @@ struct streaming_query { , m_curr_mini_info_rc() , m_prev_mini_info_rc() - , m_it(dict->m_buckets.strings, m_k) - , m_remaining_contig_bases(0) + , m_it(dict->m_spss.strings, m_k) + , m_remaining_string_bases(0) , m_num_searches(0) , m_num_extensions(0) @@ -44,13 +47,13 @@ struct streaming_query { void reset() { m_start = true; - m_remaining_contig_bases = 0; + m_remaining_string_bases = 0; m_res = lookup_result(); m_minimizer_it.reset(); m_minimizer_it_rc.reset(); } - lookup_result lookup_advanced(char const* kmer) // + lookup_result lookup(char const* kmer) // { /* 1. validation */ bool is_valid = @@ -80,7 +83,7 @@ struct streaming_query { m_curr_mini_info_rc = m_minimizer_it_rc.next(m_kmer_rc); /* 3. compute result */ - if (m_remaining_contig_bases == 0) { + if (m_remaining_string_bases == 0) { seed(); } else { auto expected_kmer = (m_res.kmer_orientation == constants::forward_orientation) @@ -89,8 +92,8 @@ struct streaming_query { if ((expected_kmer == m_kmer) or (expected_kmer == m_kmer_rc)) { ++m_num_extensions; m_res.kmer_id += m_res.kmer_orientation; - m_res.kmer_id_in_contig += m_res.kmer_orientation; - m_remaining_contig_bases -= 1; + m_res.kmer_id_in_string += m_res.kmer_orientation; + m_remaining_string_bases -= 1; } else { seed(); } @@ -101,7 +104,7 @@ struct streaming_query { m_prev_mini_info_rc = m_curr_mini_info_rc; m_start = false; - assert(equal_lookup_result(m_dict->lookup_advanced(kmer), m_res)); + assert(equal_lookup_result(m_dict->lookup(kmer), m_res)); return m_res; } @@ -112,7 +115,7 @@ struct streaming_query { uint64_t num_invalid_lookups() const { return m_num_invalid; } private: - dictionary const* m_dict; + Dict const* m_dict; /* result */ lookup_result m_res; @@ -129,8 +132,8 @@ struct streaming_query { minimizer_info m_curr_mini_info_rc, m_prev_mini_info_rc; /* string state */ - kmer_iterator m_it; - uint64_t m_remaining_contig_bases; + kmer_iterator m_it; + uint64_t m_remaining_string_bases; /* performance counts */ uint64_t m_num_searches; @@ -140,7 +143,7 @@ struct streaming_query { void seed() // { - m_remaining_contig_bases = 0; + m_remaining_string_bases = 0; /* if minimizer does not change and previous minimizer was not found, surely any kmer having the same minimizer cannot be found as well */ @@ -155,21 +158,21 @@ struct streaming_query { if constexpr (canonical) { if (m_curr_mini_info.minimizer < m_curr_mini_info_rc.minimizer) { - m_res = m_dict->lookup_uint_canonical(m_kmer, m_kmer_rc, m_curr_mini_info); + m_res = m_dict->lookup_canonical(m_kmer, m_kmer_rc, m_curr_mini_info); } else if (m_curr_mini_info_rc.minimizer < m_curr_mini_info.minimizer) { - m_res = m_dict->lookup_uint_canonical(m_kmer, m_kmer_rc, m_curr_mini_info_rc); + m_res = m_dict->lookup_canonical(m_kmer, m_kmer_rc, m_curr_mini_info_rc); } else { - m_res = m_dict->lookup_uint_canonical(m_kmer, m_kmer_rc, m_curr_mini_info); + m_res = m_dict->lookup_canonical(m_kmer, m_kmer_rc, m_curr_mini_info); if (m_res.kmer_id == constants::invalid_uint64) { - m_res = m_dict->lookup_uint_canonical(m_kmer, m_kmer_rc, m_curr_mini_info_rc); + m_res = m_dict->lookup_canonical(m_kmer, m_kmer_rc, m_curr_mini_info_rc); } } } else { - m_res = m_dict->lookup_uint_regular(m_kmer, m_curr_mini_info); + m_res = m_dict->lookup_regular(m_kmer, m_curr_mini_info); bool minimizer_found = m_res.minimizer_found; if (m_res.kmer_id == constants::invalid_uint64) { assert(m_res.kmer_orientation == constants::forward_orientation); - m_res = m_dict->lookup_uint_regular(m_kmer_rc, m_curr_mini_info_rc); + m_res = m_dict->lookup_regular(m_kmer_rc, m_curr_mini_info_rc); m_res.kmer_orientation = constants::backward_orientation; bool minimizer_rc_found = m_res.minimizer_found; m_res.minimizer_found = minimizer_rc_found or minimizer_found; @@ -183,11 +186,12 @@ struct streaming_query { assert(m_res.minimizer_found == true); m_num_searches += 1; - uint64_t kmer_offset = 2 * (m_res.kmer_id + m_res.contig_id * (m_k - 1)); - m_remaining_contig_bases = (m_res.contig_size - 1) - m_res.kmer_id_in_contig; + uint64_t kmer_offset = 2 * (m_res.kmer_id + m_res.string_id * (m_k - 1)); + m_remaining_string_bases = + (m_res.string_end - m_res.string_begin - m_k) - m_res.kmer_id_in_string; if (m_res.kmer_orientation == constants::backward_orientation) { kmer_offset += 2 * m_k; - m_remaining_contig_bases = m_res.kmer_id_in_contig; + m_remaining_string_bases = m_res.kmer_id_in_string; } m_it.at(kmer_offset); } diff --git a/include/util.hpp b/include/util.hpp index e83fe8d..bf9bebd 100644 --- a/include/util.hpp +++ b/include/util.hpp @@ -10,7 +10,13 @@ namespace sshash { -enum input_file_type { fasta, cf_seg }; +enum bucket_t : int { + SINGLETON = 0, // minimizer appears only once + MIDLOAD = 1, // minimizer appears > 1 but < 2^l times + HEAVYLOAD = 3 // minimizer appears >= 2^l times +}; + +enum input_file_t { fasta, cf_seg }; struct streaming_query_report { streaming_query_report() @@ -30,36 +36,41 @@ struct streaming_query_report { }; struct lookup_result { - lookup_result() + lookup_result(bool mf = true) : kmer_id(constants::invalid_uint64) - , kmer_id_in_contig(constants::invalid_uint64) + , kmer_id_in_string(constants::invalid_uint64) + , kmer_offset(constants::invalid_uint64) , kmer_orientation(constants::forward_orientation) - , contig_id(constants::invalid_uint64) - , contig_size(constants::invalid_uint64) - , minimizer_found(true) {} + + , string_id(constants::invalid_uint64) + , string_begin(constants::invalid_uint64) + , string_end(constants::invalid_uint64) + + , minimizer_found(mf) {} uint64_t kmer_id; // "absolute" kmer-id - uint64_t kmer_id_in_contig; // "relative" kmer-id: 0 <= kmer_id_in_contig < contig_size + uint64_t kmer_id_in_string; // "relative" kmer-id: 0 <= kmer_id_in_string < string_size, + // where string_size = string_end - string_begin - k + 1 + uint64_t kmer_offset; int64_t kmer_orientation; - uint64_t contig_id; - uint64_t contig_size; - bool minimizer_found; - uint64_t contig_begin(const uint64_t k) const { // - return kmer_id + contig_id * (k - 1) - kmer_id_in_contig; - } + uint64_t string_id; + uint64_t string_begin; + uint64_t string_end; - uint64_t contig_end(const uint64_t k) const { // - return contig_begin(k) + contig_size + k - 1; - } + bool minimizer_found; }; inline std::ostream& operator<<(std::ostream& os, lookup_result const& res) { os << " == kmer_id = " << res.kmer_id << '\n'; - os << " == kmer_id_in_contig = " << res.kmer_id_in_contig << '\n'; + os << " == kmer_id_in_string = " << res.kmer_id_in_string << '\n'; + os << " == kmer_offset = " << res.kmer_offset << '\n'; os << " == kmer_orientation = " << res.kmer_orientation << '\n'; - os << " == contig_id = " << res.contig_id << '\n'; - os << " == contig_size = " << res.contig_size << '\n'; + os << " == string_id = " << res.string_id << '\n'; + os << " == string_begin = " << res.string_begin << '\n'; + os << " == string_end = " << res.string_end << '\n'; + os << " == string_length = " << (res.string_end - res.string_begin) << '\n'; + os << " == minimizer_found = " << (res.minimizer_found ? "true" : "false") << '\n'; return os; } @@ -100,9 +111,9 @@ struct minimizer_info { << std::endl; good = false; } - if (expected.kmer_id_in_contig != got.kmer_id_in_contig) { - std::cout << "expected kmer_id_in_contig " << expected.kmer_id_in_contig << " but got " - << got.kmer_id_in_contig << std::endl; + if (expected.kmer_id_in_string != got.kmer_id_in_string) { + std::cout << "expected kmer_id_in_string " << expected.kmer_id_in_string << " but got " + << got.kmer_id_in_string << std::endl; good = false; } if (got.kmer_id != constants::invalid_uint64 and @@ -111,14 +122,19 @@ struct minimizer_info { << got.kmer_orientation << std::endl; good = false; } - if (expected.contig_id != got.contig_id) { - std::cout << "expected contig_id " << expected.contig_id << " but got " << got.contig_id + if (expected.string_id != got.string_id) { + std::cout << "expected string_id " << expected.string_id << " but got " << got.string_id << std::endl; good = false; } - if (expected.contig_size != got.contig_size) { - std::cout << "expected contig_size " << expected.contig_size << " but got " - << got.contig_size << std::endl; + if (expected.string_begin != got.string_begin) { + std::cout << "expected string_begin " << expected.string_begin << " but got " + << got.string_begin << std::endl; + good = false; + } + if (expected.string_end != got.string_end) { + std::cout << "expected string_end " << expected.string_end << " but got " << got.string_end + << std::endl; good = false; } return good; @@ -127,12 +143,11 @@ struct minimizer_info { struct build_configuration { build_configuration() : k(31) - , m(17) + , m(20) , seed(constants::seed) , num_threads(1) , ram_limit_in_GiB(constants::default_ram_limit_in_GiB) - , l(constants::min_l) , lambda(constants::lambda) , canonical(false) @@ -143,13 +158,12 @@ struct build_configuration { {} - uint64_t k; // kmer size - uint64_t m; // minimizer size + uint64_t k; // kmer length + uint64_t m; // minimizer length uint64_t seed; uint64_t num_threads; uint64_t ram_limit_in_GiB; - uint64_t l; // drive dictionary trade-off double lambda; // drive PTHash trade-off bool canonical; @@ -159,12 +173,16 @@ struct build_configuration { std::string tmp_dirname; void print() const { - std::cout << "k = " << k << ", m = " << m << ", seed = " << seed - << ", num_threads = " << num_threads - << ", ram_limit_in_GiB = " << ram_limit_in_GiB << ", l = " << l - << ", lambda = " << lambda << ", canonical = " << (canonical ? "true" : "false") - << ", weighted = " << (weighted ? "true" : "false") - << ", verbose = " << (verbose ? "true" : "false") << std::endl; + std::cout << "k = " << k // + << ", m = " << m // + << ", seed = " << seed // + << ", num_threads = " << num_threads // + << ", ram_limit_in_GiB = " << ram_limit_in_GiB // + << ", lambda = " << lambda // + << ", canonical = " << (canonical ? "true" : "false") // + << ", weighted = " << (weighted ? "true" : "false") // + << ", verbose = " << (verbose ? "true" : "false") // + << ", tmp_dirname = '" << tmp_dirname << "'" << std::endl; // } }; @@ -190,7 +208,7 @@ template [[maybe_unused]] static kmer_t string_to_uint_kmer(char const* str, uint64_t k) { assert(k <= kmer_t::max_k); kmer_t x = 0; - for (int i = k - 1; i >= 0; i--) x.append_char(kmer_t::char_to_uint(str[i])); + for (uint64_t i = 0; i != k; ++i) x.set(i, kmer_t::char_to_uint(str[i])); return x; } @@ -266,67 +284,23 @@ minimizer_info compute_minimizer(kmer_t kmer, const uint64_t k, const uint64_t m } // namespace util -// taken from tlx -static inline std::istream& appendline(std::istream& is, std::string& str, char delim = '\n') { - size_t size = str.size(); - size_t capacity = str.capacity(); - std::streamsize rest = capacity - size; - - if (rest == 0) { - // if rest is zero, already expand string - capacity = std::max(static_cast(8), capacity * 2); - rest = capacity - size; - } - - // give getline access to all of capacity - str.resize(capacity); - - // get until delim or rest is filled - is.getline(const_cast(str.data()) + size, rest, delim); - - // gcount includes the delimiter - size_t new_size = size + is.gcount(); - - // is failbit set? - if (!is) { - // if string ran out of space, expand, and retry - if (is.gcount() + 1 == rest) { - is.clear(); - str.resize(new_size); - str.reserve(capacity * 2); - return appendline(is, str, delim); - } - // else fall through and deliver error - } else if (!is.eof()) { - // subtract delimiter - --new_size; - } - - // resize string to fit its contents - str.resize(new_size); - return is; -} - struct buffered_lines_iterator { static const uint64_t BUFFER_SIZE = 1024; buffered_lines_iterator(std::istream& is, uint64_t buffer_size = BUFFER_SIZE) : m_is(is), m_buffer_size(buffer_size), m_read_chars(0) {} - bool fill_buffer(std::string& buffer, - bool force = false /* force reading of m_buffer_size characters */ - ) { + bool fill_buffer(std::string& buffer) { + if (buffer.size() >= m_buffer_size) return false; + bool empty_line_was_read = false; uint64_t size = buffer.size(); - uint64_t target_size = size + m_buffer_size; - if (force) target_size += m_buffer_size; - - buffer.resize(target_size); + buffer.resize(m_buffer_size); char* ptr = buffer.data() + size; - while (size != target_size) { + while (size < m_buffer_size) { // read until '\n' or rest is filled - uint64_t rest = target_size - size; + uint64_t rest = m_buffer_size - size; m_is.getline(ptr, rest, '\n'); uint64_t read_chars = m_is.gcount(); m_read_chars += read_chars; diff --git a/script/bench.py b/script/bench.py new file mode 100644 index 0000000..2b334e3 --- /dev/null +++ b/script/bench.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +import os +import subprocess +import sys +from pathlib import Path + +# ------------------------------ +# Argument parsing +# ------------------------------ +if len(sys.argv) < 3: + print("Usage: python3 bench.py ") + sys.exit(1) + +log_label = sys.argv[1] +index_dir = Path(sys.argv[2]).resolve() + +# ------------------------------ +# Global configuration +# ------------------------------ +results_dir = Path(f"results-{log_label}") + +datasets = [ + "cod", "kestrel", "human", "ncbi-virus", "se", "hprc" +] + +# ------------------------------ +# Utility functions +# ------------------------------ +def run_cmd(cmd, cwd=None): + """Run a shell command and print it.""" + print(f"[RUN] {' '.join(cmd)}") + subprocess.run(cmd, cwd=cwd, check=True) + + +def build_project(max_k63: bool): + """Run cmake + make with max_k63 = True/False.""" + flag = "On" if max_k63 else "Off" + print(f"\n=== Building SSHASH (MAX_KMER_LENGTH_63={flag}) ===\n") + run_cmd([ + "cmake", "..", + "-DCMAKE_BUILD_TYPE=Release", + "-DCMAKE_CXX_COMPILER=/usr/bin/g++", + "-DSSHASH_USE_ARCH_NATIVE=On", + "-DSSHASH_USE_SANITIZERS=Off", + f"-DSSHASH_USE_MAX_KMER_LENGTH_63={flag}" + ]) + run_cmd(["make", "-j"]) + + +def run_bench(k, canonical, runs = 3): + """Run SSHASH benchmark for all datasets.""" + mode = "canon" if canonical else "regular" + out_dir = results_dir / f"k{k}" + out_dir.mkdir(parents=True, exist_ok=True) + log_file = out_dir / f"{mode}-bench.log" + json_file = out_dir / f"{mode}-bench.json" + + for dataset in datasets: + suffix = f".k{k}.canon.sshash" if canonical else f".k{k}.sshash" + index_path = index_dir / f"{dataset}{suffix}" + + print(f"\n>>> Benchmarking {dataset} (k={k}, mode={mode})\n") + for i in range(runs): + print(f" ==> run {i+1}/{runs}") + cmd = ["./sshash", "bench", "-i", str(index_path)] + # Append stdout to .log, stderr to .json + with open(log_file, "a") as log, open(json_file, "a") as js: + subprocess.run(cmd, stdout=log, stderr=js, check=True) + +# ------------------------------ +# Prepare directories +# ------------------------------ +results_dir.mkdir(parents=True, exist_ok=True) +(index_dir).mkdir(parents=True, exist_ok=True) + +(results_dir / "k31").mkdir(exist_ok=True) +(results_dir / "k63").mkdir(exist_ok=True) + +# ------------------------------ +# Run benchmarks +# ------------------------------ + +# --- Build for k=31 --- +build_project(max_k63=False) +run_bench(31, False) +run_bench(31, True) + +# --- Build for k=63 --- +build_project(max_k63=True) +run_bench(63, False) +run_bench(63, True) + +# --- Restore to default --- +build_project(max_k63=False) + +print("\n All SSHash benchmark runs completed successfully. \n") diff --git a/script/bench.sh b/script/bench.sh deleted file mode 100644 index f8f519d..0000000 --- a/script/bench.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash - -echo "output log file =" $1 - -### regular indexes - -./sshash bench -i cod.k31.sshash >> $1.regular.bench_log -# ./sshash bench -i cod.k63.sshash >> $1.regular.bench_log - -./sshash bench -i kestrel.k31.sshash >> $1.regular.bench_log -# ./sshash bench -i kestrel.k63.sshash >> $1.regular.bench_log - -./sshash bench -i human.k31.sshash >> $1.regular.bench_log -# ./sshash bench -i human.k63.sshash >> $1.regular.bench_log - -### canonical indexes - -./sshash bench -i cod.k31.canon.sshash >> $1.canon.bench_log -# ./sshash bench -i cod.k63.canon.sshash >> $1.canon.bench_log - -./sshash bench -i kestrel.k31.canon.sshash >> $1.canon.bench_log -# ./sshash bench -i kestrel.k63.canon.sshash >> $1.canon.bench_log - -./sshash bench -i human.k31.canon.sshash >> $1.canon.bench_log -# ./sshash bench -i human.k63.canon.sshash >> $1.canon.bench_log \ No newline at end of file diff --git a/script/build.py b/script/build.py new file mode 100644 index 0000000..c80973b --- /dev/null +++ b/script/build.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 + +import os +import subprocess +import sys +from pathlib import Path + +if len(sys.argv) < 4: + print("Usage: python3 build.py ") + sys.exit(1) + +log_label = sys.argv[1] +datasets_dir = Path(sys.argv[2]).resolve() +index_dir = Path(sys.argv[3]).resolve() +tmp_dir = datasets_dir / "tmp_dir" +results_dir = Path(f"results-{log_label}") +threads = 64 +g = 16 + +datasets = [ + "cod", "kestrel", "human", "ncbi-virus", "se", "hprc" +] + +m_values_k31 = { + "cod": 20, "kestrel": 20, "human": 21, "ncbi-virus": 19, "se": 21, "hprc": 21 +} + +m_values_k63 = { + "cod": 24, "kestrel": 24, "human": 25, "ncbi-virus": 23, "se": 31, "hprc": 31 +} + +# --- Utilities --- +def run_cmd(cmd, cwd=None, append_to=None): + print(f"[RUN] {' '.join(cmd)}") + if append_to: + with open(append_to, "a") as f: + subprocess.run(cmd, cwd=cwd, stdout=f, stderr=f, check=True) + else: + subprocess.run(cmd, cwd=cwd, check=True) + + +def build_project(max_k63: bool): + flag = "On" if max_k63 else "Off" + print(f"\n=== Building SSHASH (MAX_KMER_LENGTH_63={flag}) ===\n") + run_cmd([ + "cmake", "..", + "-DCMAKE_BUILD_TYPE=Release", + "-DCMAKE_CXX_COMPILER=/usr/bin/g++", + "-DSSHASH_USE_ARCH_NATIVE=On", + "-DSSHASH_USE_SANITIZERS=Off", + f"-DSSHASH_USE_MAX_KMER_LENGTH_63={flag}" + ]) + run_cmd(["make", "-j"]) + + +def build_sshash(k, canonical, m_values): + mode_dir = results_dir / f"k{k}" + mode_dir.mkdir(parents=True, exist_ok=True) + + mode = "canon" if canonical else "regular" + log_file = mode_dir / f"{mode}-build.log" + json_file = mode_dir / f"{mode}-build.json" + time_file = mode_dir / f"{mode}-build.time.log" + + for dataset in datasets: + m_val = m_values[dataset] + input_file = datasets_dir / f"{dataset}.k{k}.eulertigs.fa.gz" + output_file = index_dir / f"{dataset}.k{k}" + if canonical: + output_file = str(output_file) + ".canon" + + print(f"\n>>> Building {dataset} (k={k}, m={m_val}, mode={mode})\n") + + # Clean tmp directory (should be empty after each build anyway) + subprocess.run(f"rm -rf {tmp_dir}/*", shell=True, check=True) + + cmd = [ + "/usr/bin/time", "-v", "-a", "-o", str(time_file), + "./sshash", "build", + "-i", str(input_file), + "-k", str(k), + "-m", str(m_val), + "-g", str(g), + "-t", str(threads), + "--verbose", + "-d", str(tmp_dir), + "-o", f"{output_file}.sshash" + ] + if canonical: + cmd.append("--canonical") + + # Append stdout to .log, stderr to .json + with open(log_file, "a") as log, open(json_file, "a") as js: + subprocess.run(cmd, stdout=log, stderr=js, check=True) + + +# --- Main pipeline --- +index_dir.mkdir(parents=True, exist_ok=True) +results_dir.mkdir(parents=True, exist_ok=True) + +# k = 31 +build_project(max_k63=False) +build_sshash(31, False, m_values_k31) +build_sshash(31, True, m_values_k31) + +# k = 63 +build_project(max_k63=True) +build_sshash(63, False, m_values_k63) +build_sshash(63, True, m_values_k63) + +# rebuild back to default +build_project(max_k63=False) + +print("\n All SSHash indexes built successfully. \n") diff --git a/script/build.sh b/script/build.sh deleted file mode 100644 index 48db484..0000000 --- a/script/build.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - - -echo "output log file =" $1 - -### regular indexes - -./sshash build -i ~/sshash_datasets/cod.k31.unitigs.fa.ust.fa.gz -k 31 -m 20 -t 8 -g 16 --verbose -o cod.k31.sshash -d tmp_dir >> $1.regular.build_log -# ./sshash build -i ~/sshash_datasets/cod.k63.unitigs.fa.ust.fa.gz -k 63 -m 24 -t 8 -g 16 --verbose -o cod.k63.sshash -d tmp_dir >> $1.regular.build_log - -./sshash build -i ~/sshash_datasets/kestrel.k31.unitigs.fa.ust.fa.gz -k 31 -m 20 -t 8 -g 16 --verbose -o kestrel.k31.sshash -d tmp_dir >> $1.regular.build_log -# ./sshash build -i ~/sshash_datasets/kestrel.k63.unitigs.fa.ust.fa.gz -k 63 -m 24 -t 8 -g 16 --verbose -o kestrel.k63.sshash -d tmp_dir >> $1.regular.build_log - -./sshash build -i ~/sshash_datasets/human.k31.unitigs.fa.ust.fa.gz -k 31 -m 21 -t 8 -g 16 --verbose -o human.k31.sshash -d tmp_dir >> $1.regular.build_log -# ./sshash build -i ~/sshash_datasets/human.k63.unitigs.fa.ust.fa.gz -k 63 -m 25 -t 8 -g 16 --verbose -o human.k63.sshash -d tmp_dir >> $1.regular.build_log - -### canonical indexes - -./sshash build -i ~/sshash_datasets/cod.k31.unitigs.fa.ust.fa.gz -k 31 -m 19 -t 8 -g 16 --canonical --verbose -o cod.k31.canon.sshash -d tmp_dir >> $1.canon.build_log -# ./sshash build -i ~/sshash_datasets/cod.k63.unitigs.fa.ust.fa.gz -k 63 -m 23 -t 8 -g 16 --canonical --verbose -o cod.k63.canon.sshash -d tmp_dir >> $1.canon.build_log - -./sshash build -i ~/sshash_datasets/kestrel.k31.unitigs.fa.ust.fa.gz -k 31 -m 19 -t 8 -g 16 --canonical --verbose -o kestrel.k31.canon.sshash -d tmp_dir >> $1.canon.build_log -# ./sshash build -i ~/sshash_datasets/kestrel.k63.unitigs.fa.ust.fa.gz -k 63 -m 23 -t 8 -g 16 --canonical --verbose -o kestrel.k63.canon.sshash -d tmp_dir >> $1.canon.build_log - -./sshash build -i ~/sshash_datasets/human.k31.unitigs.fa.ust.fa.gz -k 31 -m 20 -t 8 -g 16 --canonical --verbose -o human.k31.canon.sshash -d tmp_dir >> $1.canon.build_log -# ./sshash build -i ~/sshash_datasets/human.k63.unitigs.fa.ust.fa.gz -k 63 -m 24 -t 8 -g 16 --canonical --verbose -o human.k63.canon.sshash -d tmp_dir >> $1.canon.build_log \ No newline at end of file diff --git a/script/download_and_preprocess_datasets.sh b/script/download_and_preprocess_datasets.sh deleted file mode 100644 index dbffdd7..0000000 --- a/script/download_and_preprocess_datasets.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -mkdir DNA_datasets - -wget http://ftp.ensembl.org/pub/current_fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.chromosome.13.fa.gz -O DNA_datasets/Homo_sapiens.GRCh38.dna.chromosome.13.fa.gz -~/bcalm/build/bcalm -in ~/DNA_datasets/Homo_sapiens.GRCh38.dna.chromosome.13.fa.gz -kmer-size 31 -abundance-min 1 -nb-cores 64 -~/UST/ust -k 31 -i ~/Homo_sapiens.GRCh38.dna.chromosome.13.fa.unitigs.fa -gzip Homo_sapiens.GRCh38.dna.chromosome.13.fa.unitigs.fa.ust.fa -mv Homo_sapiens.GRCh38.dna.chromosome.13.fa.unitigs.fa.ust.fa.gz DNA_datasets/ - -wget http://ftp.ensembl.org/pub/current_fasta/gadus_morhua/dna/Gadus_morhua.gadMor3.0.dna.toplevel.fa.gz -O DNA_datasets/Gadus_morhua.gadMor3.0.dna.toplevel.fa.gz -~/bcalm/build/bcalm -in ~/DNA_datasets/Gadus_morhua.gadMor3.0.dna.toplevel.fa.gz -kmer-size 31 -abundance-min 1 -nb-cores 64 -~/UST/ust -k 31 -i ~/Gadus_morhua.gadMor3.0.dna.toplevel.fa.unitigs.fa -gzip Gadus_morhua.gadMor3.0.dna.toplevel.fa.unitigs.fa.ust.fa -mv Gadus_morhua.gadMor3.0.dna.toplevel.fa.unitigs.fa.ust.fa.gz DNA_datasets/ - -wget http://ftp.ensembl.org/pub/current_fasta/falco_tinnunculus/dna/Falco_tinnunculus.FalTin1.0.dna.toplevel.fa.gz -O DNA_datasets/Falco_tinnunculus.FalTin1.0.dna.toplevel.fa.gz -~/bcalm/build/bcalm -in ~/DNA_datasets/Falco_tinnunculus.FalTin1.0.dna.toplevel.fa.gz -kmer-size 31 -abundance-min 1 -nb-cores 64 -~/UST/ust -k 31 -i ~/Falco_tinnunculus.FalTin1.0.dna.toplevel.fa.unitigs.fa -gzip Falco_tinnunculus.FalTin1.0.dna.toplevel.fa.unitigs.fa.ust.fa -mv Falco_tinnunculus.FalTin1.0.dna.toplevel.fa.unitigs.fa.ust.fa.gz DNA_datasets/ - -wget http://ftp.ensembl.org/pub/current_fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.toplevel.fa.gz -O DNA_datasets/Homo_sapiens.GRCh38.dna.toplevel.fa.gz -~/bcalm/build/bcalm -in ~/DNA_datasets/Homo_sapiens.GRCh38.dna.toplevel.fa.gz -kmer-size 31 -abundance-min 1 -nb-cores 64 -~/UST/ust -k 31 -i ~/Homo_sapiens.GRCh38.dna.toplevel.fa.unitigs.fa -gzip Homo_sapiens.GRCh38.dna.toplevel.fa.unitigs.fa.ust.fa -mv Homo_sapiens.GRCh38.dna.toplevel.fa.unitigs.fa.ust.fa.gz DNA_datasets/ - -wget https://zenodo.org/record/995689/files/bacterial.genome.fixed.fa -O DNA_datasets/bacterial.genome.fixed.fa -~/bcalm/build/bcalm -in ~/DNA_datasets/bacterial.genome.fixed.fa -kmer-size 31 -abundance-min 1 -nb-cores 64 -~/UST/ust -k 31 -i ~/bacterial.genome.fixed.fa -gzip bacterial.genome.fixed.fa.unitigs.fa.ust.fa -mv bacterial.genome.fixed.fa.unitigs.fa.ust.fa.gz DNA_datasets/ diff --git a/script/streaming-query-high-hit.py b/script/streaming-query-high-hit.py new file mode 100644 index 0000000..25008d5 --- /dev/null +++ b/script/streaming-query-high-hit.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +import os +import subprocess +import sys +from pathlib import Path + +# ------------------------------ +# Argument parsing +# ------------------------------ +if len(sys.argv) < 4: + print("Usage: python3 streaming-query-high-hit.py ") + sys.exit(1) + +log_label = sys.argv[1] +index_dir = Path(sys.argv[2]).resolve() +query_dir = Path(sys.argv[3]).resolve() + +# ------------------------------ +# Global configuration +# ------------------------------ +results_dir = Path(f"results-{log_label}") + +datasets = [ + "cod", "kestrel", "human", "ncbi-virus", "se", "hprc" +] + +queries = {"cod":"SRR12858649", "kestrel":"SRR11449743_1", "human":"SRR5833294", "ncbi-virus":"ncbi-queries", "se":"SRR27871075_1", "hprc":"SRR5833294"} + +# ------------------------------ +# Utility functions +# ------------------------------ +def run_cmd(cmd, cwd=None): + """Run a shell command and print it.""" + print(f"[RUN] {' '.join(cmd)}") + subprocess.run(cmd, cwd=cwd, check=True) + + +def build_project(max_k63: bool): + """Run cmake + make with max_k63 = True/False.""" + flag = "On" if max_k63 else "Off" + print(f"\n=== Building SSHASH (MAX_KMER_LENGTH_63={flag}) ===\n") + run_cmd([ + "cmake", "..", + "-DCMAKE_BUILD_TYPE=Release", + "-DCMAKE_CXX_COMPILER=/usr/bin/g++", + "-DSSHASH_USE_ARCH_NATIVE=On", + "-DSSHASH_USE_SANITIZERS=Off", + f"-DSSHASH_USE_MAX_KMER_LENGTH_63={flag}" + ]) + run_cmd(["make", "-j"]) + + +def run_bench(k, canonical, runs = 1): + """Run SSHASH benchmark for all datasets.""" + mode = "canon" if canonical else "regular" + out_dir = results_dir / f"k{k}" + out_dir.mkdir(parents=True, exist_ok=True) + log_file = out_dir / f"{mode}-streaming-queries-high-hit.log" + json_file = out_dir / f"{mode}-streaming-queries-high-hit.json" + + for dataset in datasets: + suffix = f".k{k}.canon.sshash" if canonical else f".k{k}.sshash" + index_path = index_dir / f"{dataset}{suffix}" + + print(f"\n>>> Benchmarking {dataset} (k={k}, mode={mode})\n") + for i in range(runs): + print(f" ==> run {i+1}/{runs}") + cmd = ["./sshash", "query", "-i", str(index_path), "-q", str(query_dir) + "/" + queries[dataset] + ".fastq.gz"] + # Append stdout to .log, stderr to .json + with open(log_file, "a") as log, open(json_file, "a") as js: + subprocess.run(cmd, stdout=log, stderr=js, check=True) + +# ------------------------------ +# Prepare directories +# ------------------------------ +results_dir.mkdir(parents=True, exist_ok=True) +(index_dir).mkdir(parents=True, exist_ok=True) + +(results_dir / "k31").mkdir(exist_ok=True) +(results_dir / "k63").mkdir(exist_ok=True) + +# ------------------------------ +# Run benchmarks +# ------------------------------ + +# --- Build for k=31 --- +build_project(max_k63=False) +run_bench(31, False) +run_bench(31, True) + +# --- Build for k=63 --- +build_project(max_k63=True) +run_bench(63, False) +run_bench(63, True) + +# --- Restore to default --- +build_project(max_k63=False) + +print("\n All SSHash benchmark runs completed successfully. \n") diff --git a/script/streaming-query-high-hit.sh b/script/streaming-query-high-hit.sh deleted file mode 100644 index 07eb4fd..0000000 --- a/script/streaming-query-high-hit.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash - -echo "output log file =" $1 - -### regular indexes - -./sshash query -i cod.k31.sshash -q ~/sshash_queries/SRR12858649.fastq.gz >> $1.regular.high-hit.streaming_query_log -# ./sshash query -i cod.k63.sshash -q ~/sshash_queries/SRR12858649.fastq.gz >> $1.regular.high-hit.streaming_query_log - -./sshash query -i kestrel.k31.sshash -q ~/sshash_queries/SRR11449743_1.fastq.gz >> $1.regular.high-hit.streaming_query_log -# ./sshash query -i kestrel.k63.sshash -q ~/sshash_queries/SRR11449743_1.fastq.gz >> $1.regular.high-hit.streaming_query_log - -./sshash query -i human.k31.sshash -q ~/sshash_queries/SRR5833294.fastq.gz >> $1.regular.high-hit.streaming_query_log -# ./sshash query -i human.k63.sshash -q ~/sshash_queries/SRR5833294.fastq.gz >> $1.regular.high-hit.streaming_query_log - -### canonical indexes - -./sshash query -i cod.k31.canon.sshash -q ~/sshash_queries/SRR12858649.fastq.gz >> $1.canon.high-hit.streaming_query_log -# ./sshash query -i cod.k63.canon.sshash -q ~/sshash_queries/SRR12858649.fastq.gz >> $1.canon.high-hit.streaming_query_log - -./sshash query -i kestrel.k31.canon.sshash -q ~/sshash_queries/SRR11449743_1.fastq.gz >> $1.canon.high-hit.streaming_query_log -# ./sshash query -i kestrel.k63.canon.sshash -q ~/sshash_queries/SRR11449743_1.fastq.gz >> $1.canon.high-hit.streaming_query_log - -./sshash query -i human.k31.canon.sshash -q ~/sshash_queries/SRR5833294.fastq.gz >> $1.canon.high-hit.streaming_query_log -# ./sshash query -i human.k63.canon.sshash -q ~/sshash_queries/SRR5833294.fastq.gz >> $1.canon.high-hit.streaming_query_log diff --git a/script/streaming-query-low-hit.sh b/script/streaming-query-low-hit.sh deleted file mode 100644 index 1ee0e52..0000000 --- a/script/streaming-query-low-hit.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash - -echo "output log file =" $1 - -### regular indexes - -./sshash query -i cod.k31.sshash -q ~/sshash_queries/SRR11449743_1.fastq.gz >> $1.regular.low-hit.streaming_query_log -# ./sshash query -i cod.k63.sshash -q ~/sshash_queries/SRR11449743_1.fastq.gz >> $1.regular.low-hit.streaming_query_log - -./sshash query -i kestrel.k31.sshash -q ~/sshash_queries/SRR12858649.fastq.gz >> $1.regular.low-hit.streaming_query_log -# ./sshash query -i kestrel.k63.sshash -q ~/sshash_queries/SRR12858649.fastq.gz >> $1.regular.low-hit.streaming_query_log - -./sshash query -i human.k31.sshash -q ~/sshash_queries/SRR5901135_1.fastq.gz >> $1.regular.low-hit.streaming_query_log -# ./sshash query -i human.k63.sshash -q ~/sshash_queries/SRR5901135_1.fastq.gz >> $1.regular.low-hit.streaming_query_log - -### canonical indexes - -./sshash query -i cod.k31.canon.sshash -q ~/sshash_queries/SRR11449743_1.fastq.gz >> $1.canon.low-hit.streaming_query_log -# ./sshash query -i cod.k63.canon.sshash -q ~/sshash_queries/SRR11449743_1.fastq.gz >> $1.canon.low-hit.streaming_query_log - -./sshash query -i kestrel.k31.canon.sshash -q ~/sshash_queries/SRR12858649.fastq.gz >> $1.canon.low-hit.streaming_query_log -# ./sshash query -i kestrel.k63.canon.sshash -q ~/sshash_queries/SRR12858649.fastq.gz >> $1.canon.low-hit.streaming_query_log - -./sshash query -i human.k31.canon.sshash -q ~/sshash_queries/SRR5901135_1.fastq.gz >> $1.canon.low-hit.streaming_query_log -# ./sshash query -i human.k63.canon.sshash -q ~/sshash_queries/SRR5901135_1.fastq.gz >> $1.canon.low-hit.streaming_query_log diff --git a/src/build.cpp b/src/build.cpp index 117f2d7..4962bc1 100644 --- a/src/build.cpp +++ b/src/build.cpp @@ -1,188 +1,30 @@ -#include "include/dictionary.hpp" -#include "essentials.hpp" -#include "include/builder/util.hpp" +#include "include/builder/dictionary_builder.hpp" -#include "include/builder/parse_file.hpp" -#include "include/builder/build_sparse_index.hpp" -#include "include/builder/build_skew_index.hpp" - -#include // for std::accumulate +#include "include/builder/parse_file.cpp" +#include "include/builder/build_sparse_and_skew_index.cpp" +#include "include/builder/compute_minimizer_tuples.cpp" namespace sshash { -template -void dictionary::build(std::string const& filename, - build_configuration const& build_config) { +template +void dictionary::build(std::string const& filename, + build_configuration const& build_config) // +{ /* Validate the build configuration. */ if (build_config.k == 0) throw std::runtime_error("k must be > 0"); - if (build_config.k > kmer_t::max_k) { - throw std::runtime_error("k must be less <= " + std::to_string(kmer_t::max_k) + + if (build_config.k > Kmer::max_k) { + throw std::runtime_error("k must be less <= " + std::to_string(Kmer::max_k) + " but got k = " + std::to_string(build_config.k)); } if (build_config.m == 0) throw std::runtime_error("m must be > 0"); - if (build_config.m > kmer_t::max_m) { - throw std::runtime_error("m must be less <= " + std::to_string(kmer_t::max_m) + + if (build_config.m > Kmer::max_m) { + throw std::runtime_error("m must be less <= " + std::to_string(Kmer::max_m) + " but got m = " + std::to_string(build_config.m)); } if (build_config.m > build_config.k) throw std::runtime_error("m must be <= k"); - if (build_config.l >= constants::max_l) { - throw std::runtime_error("l must be < " + std::to_string(constants::max_l)); - } - - m_k = build_config.k; - m_m = build_config.m; - m_canonical = build_config.canonical; - m_skew_index.min_log2 = build_config.l; - m_hasher.seed(build_config.seed); - - std::vector timings; - timings.reserve(7); - essentials::timer_type timer; - - /* step 1: parse the input file, encode sequences (1.1), and compute minimizer tuples (1.2) ***/ - timer.start(); - parse_data data(build_config); - parse_file(filename, data, build_config); - m_size = data.num_kmers; - if (build_config.weighted) { - essentials::timer_type timer; - timer.start(); - data.weights_builder.build(m_weights); - timer.stop(); - print_time(timer.elapsed(), data.num_kmers, "step 1.3: 'build_weights'"); - if (build_config.verbose) { - double entropy_weights = data.weights_builder.print_info(data.num_kmers); - double avg_bits_per_weight = static_cast(m_weights.num_bits()) / data.num_kmers; - std::cout << "weights: " << avg_bits_per_weight << " [bits/kmer]" << std::endl; - std::cout << " (" << entropy_weights / avg_bits_per_weight - << "x smaller than the empirical entropy)" << std::endl; - } - } - timer.stop(); - timings.push_back(timer.elapsed()); - print_time(timings.back(), data.num_kmers, "step 1: 'parse_file'"); - timer.reset(); - /******/ - - /* step 2: merge minimizer tuples and build MPHF ***/ - { - timer.start(); - data.minimizers.merge(); - timer.stop(); - timings.push_back(timer.elapsed()); - print_time(timings.back(), data.num_kmers, "step 2.1: 'merging_minimizers_tuples'"); - - timer.reset(); - - timer.start(); - const uint64_t num_minimizers = data.minimizers.num_minimizers(); - mm::file_source input(data.minimizers.get_minimizers_filename(), - mm::advice::sequential); - minimizers_tuples_iterator iterator(input.data(), input.data() + input.size()); - m_minimizers.build(iterator, num_minimizers, build_config); - input.close(); - assert(m_minimizers.size() == num_minimizers); - timer.stop(); - timings.push_back(timer.elapsed()); - print_time(timings.back(), data.num_kmers, "step 2.2: 'build_minimizers_mphf'"); - - timer.reset(); - } - - { - if (build_config.verbose) std::cout << "re-sorting minimizer tuples..." << std::endl; - - timer.start(); - - std::string filename = data.minimizers.get_minimizers_filename(); - std::ifstream input(filename, std::ifstream::binary); - - auto const& f = m_minimizers; - const uint64_t num_threads = build_config.num_threads; - const uint64_t num_files_to_merge = data.minimizers.num_files_to_merge(); - - data.minimizers.init(); - - const uint64_t num_super_kmers = data.minimizers.num_super_kmers(); - const uint64_t buffer_size = num_files_to_merge == 1 - ? num_super_kmers - : ((build_config.ram_limit_in_GiB * essentials::GiB) / - (2 * sizeof(minimizer_tuple))); - const uint64_t num_blocks = (num_super_kmers + buffer_size - 1) / buffer_size; - assert(num_super_kmers > (num_blocks - 1) * buffer_size); - - std::vector threads; - threads.reserve(num_threads); - - std::vector buffer; - for (uint64_t i = 0; i != num_blocks; ++i) { - const uint64_t n = (i == num_blocks - 1) - ? num_super_kmers - (num_blocks - 1) * buffer_size - : buffer_size; - buffer.resize(n); - input.read(reinterpret_cast(buffer.data()), - buffer.size() * sizeof(minimizer_tuple)); - const uint64_t chunk_size = (n + num_threads - 1) / num_threads; - for (uint64_t t = 0; t * chunk_size < n; ++t) { - uint64_t begin = t * chunk_size; - uint64_t end = std::min(n, begin + chunk_size); - threads.emplace_back([begin, end, &buffer, &f]() { - for (uint64_t i = begin; i < end; ++i) { - buffer[i].minimizer = f.lookup(buffer[i].minimizer); - } - }); - } - for (auto& t : threads) { - if (t.joinable()) t.join(); - } - threads.clear(); - data.minimizers.sort_and_flush(buffer); - } - assert(buffer.empty()); - - timer.stop(); - timings.push_back(timer.elapsed()); - print_time(timings.back(), data.num_kmers, - "step 2.3: 'replacing minimizer values with MPHF hashes'"); - timer.reset(); - - timer.start(); - data.minimizers.merge(); - input.close(); - timer.stop(); - timings.push_back(timer.elapsed()); - print_time(timings.back(), data.num_kmers, "step 2.4: 'merging_minimizers_tuples '"); - timer.reset(); - } - /******/ - - /* step 3: build sparse index ***/ - timer.start(); - auto buckets_stats = build_sparse_index(data, m_buckets, build_config); - timer.stop(); - timings.push_back(timer.elapsed()); - print_time(timings.back(), data.num_kmers, "step 3: 'build_sparse_index'"); - timer.reset(); - /******/ - - /* step 4: build skew index ***/ - timer.start(); - build_skew_index(m_skew_index, data, m_buckets, build_config, buckets_stats); - timer.stop(); - timings.push_back(timer.elapsed()); - print_time(timings.back(), data.num_kmers, "step 4: 'build_skew_index'"); - timer.reset(); - /******/ - - assert(timings.size() == 7); - double total_time = std::accumulate(timings.begin(), timings.end(), 0.0); - print_time(total_time, data.num_kmers, "total_time"); - - print_space_breakdown(); - - if (build_config.verbose) buckets_stats.print_less(); - data.minimizers.remove_tmp_file(); + dictionary_builder builder(build_config); + builder.build(*this, filename); } } // namespace sshash diff --git a/src/dictionary.cpp b/src/dictionary.cpp index f2cb996..d0838c9 100644 --- a/src/dictionary.cpp +++ b/src/dictionary.cpp @@ -1,252 +1,210 @@ #include "include/dictionary.hpp" +#include "include/builder/util.hpp" + namespace sshash { -template -lookup_result dictionary::lookup_uint_regular(kmer_t uint_kmer) const { +template +lookup_result dictionary::lookup_regular(const Kmer uint_kmer) const { auto mini_info = util::compute_minimizer(uint_kmer, m_k, m_m, m_hasher); - return lookup_uint_regular(uint_kmer, mini_info); + return lookup_regular(uint_kmer, mini_info); } -template -lookup_result dictionary::lookup_uint_regular(kmer_t uint_kmer, // - minimizer_info mini_info) const // +template +lookup_result dictionary::lookup_regular(const Kmer uint_kmer, // + const minimizer_info mini_info) const // { assert(minimizer_info(mini_info.minimizer, mini_info.pos_in_kmer) == util::compute_minimizer(uint_kmer, m_k, m_m, m_hasher)); - const uint64_t bucket_id = m_minimizers.lookup(mini_info.minimizer); - const auto [begin, end] = m_buckets.locate_bucket(bucket_id); - - if (m_skew_index.empty()) { - return m_buckets.lookup(begin, end, uint_kmer, mini_info, m_k, m_m); - } - - const uint64_t bucket_size = end - begin; - const uint64_t log2_bucket_size = bits::util::ceil_log2_uint32(bucket_size); - if (log2_bucket_size > m_skew_index.min_log2) { - uint64_t pos_in_bucket = m_skew_index.lookup(uint_kmer, log2_bucket_size); - /* It must hold pos_in_bucket < bucket_size for the kmer to exist. */ - if (pos_in_bucket < bucket_size) { - return m_buckets.lookup(begin + pos_in_bucket, uint_kmer, mini_info, m_k, m_m); - } - return lookup_result(); - } - - return m_buckets.lookup(begin, end, uint_kmer, mini_info, m_k, m_m); + auto it = m_ssi.lookup(uint_kmer, mini_info); + return m_spss.lookup_regular(it, uint_kmer, mini_info); } -template -lookup_result dictionary::lookup_uint_canonical(kmer_t uint_kmer) const // +template +lookup_result dictionary::lookup_canonical(Kmer uint_kmer) const // { - kmer_t uint_kmer_rc = uint_kmer; + Kmer uint_kmer_rc = uint_kmer; uint_kmer_rc.reverse_complement_inplace(m_k); auto mini_info = util::compute_minimizer(uint_kmer, m_k, m_m, m_hasher); auto mini_info_rc = util::compute_minimizer(uint_kmer_rc, m_k, m_m, m_hasher); if (mini_info.minimizer < mini_info_rc.minimizer) { - return lookup_uint_canonical(uint_kmer, uint_kmer_rc, mini_info); + return lookup_canonical(uint_kmer, uint_kmer_rc, mini_info); } else if (mini_info_rc.minimizer < mini_info.minimizer) { - return lookup_uint_canonical(uint_kmer, uint_kmer_rc, mini_info_rc); + return lookup_canonical(uint_kmer, uint_kmer_rc, mini_info_rc); } else { - auto res = lookup_uint_canonical(uint_kmer, uint_kmer_rc, mini_info); + auto res = lookup_canonical(uint_kmer, uint_kmer_rc, mini_info); if (res.kmer_id == constants::invalid_uint64) { - res = lookup_uint_canonical(uint_kmer, uint_kmer_rc, mini_info_rc); + res = lookup_canonical(uint_kmer, uint_kmer_rc, mini_info_rc); } return res; } } -template -lookup_result dictionary::lookup_uint_canonical(kmer_t uint_kmer, kmer_t uint_kmer_rc, - minimizer_info mini_info) const // +template +lookup_result dictionary::lookup_canonical(const Kmer uint_kmer, // + const Kmer uint_kmer_rc, // + const minimizer_info mini_info) const // { assert(mini_info.minimizer == std::min(util::compute_minimizer(uint_kmer, m_k, m_m, m_hasher).minimizer, util::compute_minimizer(uint_kmer_rc, m_k, m_m, m_hasher).minimizer)); - const uint64_t bucket_id = m_minimizers.lookup(mini_info.minimizer); - const auto [begin, end] = m_buckets.locate_bucket(bucket_id); - - if (m_skew_index.empty()) { - return m_buckets.lookup_canonical(begin, end, uint_kmer, uint_kmer_rc, mini_info, m_k, m_m); - } - - const uint64_t bucket_size = end - begin; - const uint64_t log2_bucket_size = bits::util::ceil_log2_uint32(bucket_size); - if (log2_bucket_size > m_skew_index.min_log2) { - auto uint_kmer_canon = std::min(uint_kmer, uint_kmer_rc); - uint64_t pos_in_bucket = m_skew_index.lookup(uint_kmer_canon, log2_bucket_size); - if (pos_in_bucket < bucket_size) { - auto res = m_buckets.lookup_canonical(begin + pos_in_bucket, uint_kmer, uint_kmer_rc, - mini_info, m_k, m_m); - if (res.kmer_id != constants::invalid_uint64) return res; - } - return lookup_result(); - } - - return m_buckets.lookup_canonical(begin, end, uint_kmer, uint_kmer_rc, mini_info, m_k, m_m); + const Kmer uint_kmer_canon = std::min(uint_kmer, uint_kmer_rc); + auto it = m_ssi.lookup(uint_kmer_canon, mini_info); + return m_spss.lookup_canonical(it, uint_kmer, uint_kmer_rc, mini_info); } -template -uint64_t dictionary::lookup(char const* string_kmer, bool check_reverse_complement) const { - kmer_t uint_kmer = util::string_to_uint_kmer(string_kmer, m_k); - return lookup_uint(uint_kmer, check_reverse_complement); -} -template -uint64_t dictionary::lookup_uint(kmer_t uint_kmer, bool check_reverse_complement) const { - auto res = lookup_advanced_uint(uint_kmer, check_reverse_complement); - return res.kmer_id; +template +lookup_result dictionary::lookup(char const* string_kmer, + bool check_reverse_complement) const { + Kmer uint_kmer = util::string_to_uint_kmer(string_kmer, m_k); + return lookup(uint_kmer, check_reverse_complement); } - -template -lookup_result dictionary::lookup_advanced(char const* string_kmer, - bool check_reverse_complement) const { - kmer_t uint_kmer = util::string_to_uint_kmer(string_kmer, m_k); - return lookup_advanced_uint(uint_kmer, check_reverse_complement); -} -template -lookup_result dictionary::lookup_advanced_uint(kmer_t uint_kmer, - bool check_reverse_complement) const { - if (m_canonical) return lookup_uint_canonical(uint_kmer); - auto res = lookup_uint_regular(uint_kmer); +template +lookup_result dictionary::lookup(Kmer uint_kmer, + bool check_reverse_complement) const // +{ + if (m_canonical) return lookup_canonical(uint_kmer); + auto res = lookup_regular(uint_kmer); assert(res.kmer_orientation == constants::forward_orientation); if (check_reverse_complement and res.kmer_id == constants::invalid_uint64) { - kmer_t uint_kmer_rc = uint_kmer; + Kmer uint_kmer_rc = uint_kmer; uint_kmer_rc.reverse_complement_inplace(m_k); - res = lookup_uint_regular(uint_kmer_rc); + res = lookup_regular(uint_kmer_rc); res.kmer_orientation = constants::backward_orientation; } return res; } -template -bool dictionary::is_member(char const* string_kmer, bool check_reverse_complement) const { +template +bool dictionary::is_member(char const* string_kmer, + bool check_reverse_complement) const { return lookup(string_kmer, check_reverse_complement) != constants::invalid_uint64; } -template -bool dictionary::is_member_uint(kmer_t uint_kmer, bool check_reverse_complement) const { - return lookup_uint(uint_kmer, check_reverse_complement) != constants::invalid_uint64; +template +bool dictionary::is_member(Kmer uint_kmer, bool check_reverse_complement) const { + return lookup(uint_kmer, check_reverse_complement) != constants::invalid_uint64; } -template -void dictionary::access(uint64_t kmer_id, char* string_kmer) const { - assert(kmer_id < size()); - m_buckets.access(kmer_id, string_kmer, m_k); +template +void dictionary::access(uint64_t kmer_id, char* string_kmer) const { + assert(kmer_id < num_kmers()); + m_spss.access(kmer_id, string_kmer); } -template -uint64_t dictionary::weight(uint64_t kmer_id) const { - assert(kmer_id < size()); +template +uint64_t dictionary::weight(uint64_t kmer_id) const { + assert(kmer_id < num_kmers()); return m_weights.weight(kmer_id); } -template -uint64_t dictionary::contig_size(uint64_t contig_id) const { - assert(contig_id < num_contigs()); - auto [begin, end] = m_buckets.contig_offsets(contig_id); - uint64_t contig_length = end - begin; - assert(contig_length >= m_k); - return contig_length - m_k + 1; +template +uint64_t dictionary::string_size(uint64_t string_id) const { + assert(string_id < num_strings()); + auto [begin, end] = m_spss.string_offsets(string_id); + uint64_t string_length = end - begin; + assert(string_length >= m_k); + return string_length - m_k + 1; } -template -void dictionary::forward_neighbours(kmer_t suffix, neighbourhood& res, - bool check_reverse_complement) const { - for (size_t i = 0; i < kmer_t::alphabet_size; i++) { - kmer_t new_kmer = suffix; - new_kmer.set(m_k - 1, kmer_t::char_to_uint(kmer_t::alphabet[i])); - res.forward[i] = lookup_advanced_uint(new_kmer, check_reverse_complement); +template +void dictionary::forward_neighbours(Kmer suffix, neighbourhood& res, + bool check_reverse_complement) const { + for (size_t i = 0; i < Kmer::alphabet_size; i++) { + Kmer new_kmer = suffix; + new_kmer.set(m_k - 1, Kmer::char_to_uint(Kmer::alphabet[i])); + res.forward[i] = lookup(new_kmer, check_reverse_complement); } } -template -void dictionary::backward_neighbours(kmer_t prefix, neighbourhood& res, - bool check_reverse_complement) const { - for (size_t i = 0; i < kmer_t::alphabet_size; i++) { - kmer_t new_kmer = prefix; - new_kmer.set(0, kmer_t::char_to_uint(kmer_t::alphabet[i])); - res.backward[i] = lookup_advanced_uint(new_kmer, check_reverse_complement); +template +void dictionary::backward_neighbours(Kmer prefix, neighbourhood& res, + bool check_reverse_complement) const { + for (size_t i = 0; i < Kmer::alphabet_size; i++) { + Kmer new_kmer = prefix; + new_kmer.set(0, Kmer::char_to_uint(Kmer::alphabet[i])); + res.backward[i] = lookup(new_kmer, check_reverse_complement); } } -template -neighbourhood dictionary::kmer_forward_neighbours( +template +neighbourhood dictionary::kmer_forward_neighbours( char const* string_kmer, bool check_reverse_complement) const { - kmer_t uint_kmer = util::string_to_uint_kmer(string_kmer, m_k); + Kmer uint_kmer = util::string_to_uint_kmer(string_kmer, m_k); return kmer_forward_neighbours(uint_kmer, check_reverse_complement); } -template -kmer_t dictionary::get_suffix(kmer_t kmer) const { - kmer_t suffix = kmer; +template +Kmer dictionary::get_suffix(Kmer kmer) const { + Kmer suffix = kmer; suffix.drop_char(); return suffix; } -template -neighbourhood dictionary::kmer_forward_neighbours( - kmer_t uint_kmer, bool check_reverse_complement) const { - neighbourhood res; +template +neighbourhood dictionary::kmer_forward_neighbours( + Kmer uint_kmer, bool check_reverse_complement) const { + neighbourhood res; forward_neighbours(get_suffix(uint_kmer), res, check_reverse_complement); return res; } -template -neighbourhood dictionary::kmer_backward_neighbours( +template +neighbourhood dictionary::kmer_backward_neighbours( char const* string_kmer, bool check_reverse_complement) const { - kmer_t uint_kmer = util::string_to_uint_kmer(string_kmer, m_k); + Kmer uint_kmer = util::string_to_uint_kmer(string_kmer, m_k); return kmer_backward_neighbours(uint_kmer, check_reverse_complement); } -template -kmer_t dictionary::get_prefix(kmer_t kmer) const { - kmer_t prefix = kmer; +template +Kmer dictionary::get_prefix(Kmer kmer) const { + Kmer prefix = kmer; prefix.pad_char(); prefix.take_chars(m_k); return prefix; } -template -neighbourhood dictionary::kmer_backward_neighbours( - kmer_t uint_kmer, bool check_reverse_complement) const { - neighbourhood res; +template +neighbourhood dictionary::kmer_backward_neighbours( + Kmer uint_kmer, bool check_reverse_complement) const { + neighbourhood res; backward_neighbours(get_prefix(uint_kmer), res, check_reverse_complement); return res; } -template -neighbourhood dictionary::kmer_neighbours(char const* string_kmer, - bool check_reverse_complement) const { - kmer_t uint_kmer = util::string_to_uint_kmer(string_kmer, m_k); +template +neighbourhood dictionary::kmer_neighbours( + char const* string_kmer, bool check_reverse_complement) const { + Kmer uint_kmer = util::string_to_uint_kmer(string_kmer, m_k); return kmer_neighbours(uint_kmer, check_reverse_complement); } -template -neighbourhood dictionary::kmer_neighbours(kmer_t uint_kmer, - bool check_reverse_complement) const { - neighbourhood res; +template +neighbourhood dictionary::kmer_neighbours( + Kmer uint_kmer, bool check_reverse_complement) const { + neighbourhood res; forward_neighbours(get_suffix(uint_kmer), res, check_reverse_complement); backward_neighbours(get_prefix(uint_kmer), res, check_reverse_complement); return res; } -template -neighbourhood dictionary::contig_neighbours(uint64_t contig_id, - bool check_reverse_complement) const { - assert(contig_id < num_contigs()); - neighbourhood res; - kmer_t suffix = m_buckets.contig_suffix(contig_id, m_k); +template +neighbourhood dictionary::string_neighbours( + uint64_t string_id, bool check_reverse_complement) const { + assert(string_id < num_strings()); + neighbourhood res; + Kmer suffix = m_spss.string_suffix(string_id); forward_neighbours(suffix, res, check_reverse_complement); - kmer_t prefix = m_buckets.contig_prefix(contig_id, m_k); + Kmer prefix = m_spss.string_prefix(string_id); prefix.pad_char(); backward_neighbours(prefix, res, check_reverse_complement); return res; } -template -uint64_t dictionary::num_bits() const { - return 8 * (sizeof(m_vnum) + sizeof(m_size) + sizeof(m_hasher) + sizeof(m_k) + sizeof(m_m) + - sizeof(m_canonical)) + - m_minimizers.num_bits() + m_buckets.num_bits() + m_skew_index.num_bits() + - m_weights.num_bits(); +template +uint64_t dictionary::num_bits() const { + return 8 * (sizeof(m_vnum) + sizeof(m_num_kmers) + sizeof(m_num_strings) + sizeof(m_k) + + sizeof(m_m) + sizeof(m_canonical) + sizeof(m_hasher)) + + m_spss.num_bits() + m_ssi.num_bits() + m_weights.num_bits(); } } // namespace sshash diff --git a/src/info.cpp b/src/info.cpp index 27ccdf9..cf214fc 100644 --- a/src/info.cpp +++ b/src/info.cpp @@ -2,95 +2,61 @@ namespace sshash { -[[maybe_unused]] -static inline double bits_per_kmer_formula(uint64_t k, /* kmer length */ - uint64_t m, /* minimizer length */ - uint64_t n, /* num. kmers */ - uint64_t M) /* num. strings in SPSS */ -{ - /* - Caveats: - 1. we assume an alphabet of size 4 - 2. this assumes a random minimizer scheme, so num. super-kmers is ~ 2n/(k-m+2) - 3. we neglect lower order terms and skew index space - 4. not canonical - */ - - assert(k > 0); - assert(k >= m); - - const uint64_t N = n + M * (k - 1); // num. characters in SPSS - - /* summing (M-1) provides an upper bound to the num. of super-kmers */ - double Z = (2.0 * n) / (k - m + 2) + (M - 1); - - /* A cache line is 64 B = 512 bits --> - max window_size that fits in a cache line is 512/2 = 256 - assuming a 2-bit encoded stream. */ - const uint64_t window_size = 1; /* 256; */ - - double num_bits = - 2 * N + Z * (5.0 + std::ceil(std::log2(std::ceil(static_cast(N) / window_size)))) + - M * (2.0 + std::ceil(std::log2(static_cast(N) / M))); - - return num_bits / n; -} +template +void dictionary::print_space_breakdown() const { + const uint64_t num_bytes = (num_bits() + 7) / 8; -inline double perc(uint64_t amount, uint64_t total) { return (amount * 100.0) / total; } + auto perc = [](uint64_t amount, uint64_t total) -> double { return (amount * 100.0) / total; }; -template -void dictionary::print_space_breakdown() const { - const uint64_t num_bytes = (num_bits() + 7) / 8; std::cout << "total index size: " << num_bytes << " [B] -- " << essentials::convert(num_bytes, essentials::MB) << " [MB]" << '\n'; std::cout << "SPACE BREAKDOWN:\n"; - std::cout << " minimizers: " << static_cast(m_minimizers.num_bits()) / size() + std::cout << " mphf: " << static_cast(m_ssi.codewords.mphf.num_bits()) / num_kmers() << " [bits/kmer] (" - << static_cast(m_minimizers.num_bits()) / m_minimizers.size() - << " [bits/key]) -- " << perc(m_minimizers.num_bits(), num_bits()) << "%\n"; - std::cout << " pieces: " << (8.0 * m_buckets.pieces.num_bytes()) / size() << " [bits/kmer] -- " - << perc(m_buckets.pieces.num_bytes() * 8, num_bits()) << "%\n"; - std::cout << " sizes: " << (m_buckets.bucket_sizes.num_bytes() * 8.0) / size() - << " [bits/kmer] -- " << perc(m_buckets.bucket_sizes.num_bytes() * 8, num_bits()) + << static_cast(m_ssi.codewords.mphf.num_bits()) / + m_ssi.codewords.mphf.num_keys() + << " [bits/key]) -- " << perc(m_ssi.codewords.mphf.num_bits(), num_bits()) << "%\n"; + std::cout << " strings_offsets: " << (8.0 * m_spss.strings_offsets.num_bytes()) / num_kmers() + << " [bits/kmer] -- " << perc(m_spss.strings_offsets.num_bytes() * 8, num_bits()) + << "%\n"; + + std::cout << " control_codewords: " + << (8.0 * m_ssi.codewords.control_codewords.num_bytes()) / num_kmers() + << " [bits/kmer] -- " + << perc(8 * m_ssi.codewords.control_codewords.num_bytes(), num_bits()) << "%\n"; + std::cout << " mid_load_buckets: " << (8.0 * m_ssi.mid_load_buckets.num_bytes()) / num_kmers() + << " [bits/kmer] -- " << perc(8 * m_ssi.mid_load_buckets.num_bytes(), num_bits()) << "%\n"; - std::cout << " offsets: " << (8.0 * m_buckets.offsets.num_bytes()) / size() - << " [bits/kmer] -- " << perc(8 * m_buckets.offsets.num_bytes(), num_bits()) << "%\n"; - std::cout << " strings: " << (8.0 * m_buckets.strings.num_bytes()) / size() - << " [bits/kmer] -- " << perc(8 * m_buckets.strings.num_bytes(), num_bits()) << "%\n"; - std::cout << " skew_index: " << static_cast(m_skew_index.num_bits()) / size() - << " [bits/kmer] -- " << perc(m_skew_index.num_bits(), num_bits()) << "%\n"; - std::cout << " weights: " << static_cast(m_weights.num_bits()) / size() + std::cout << " begin_buckets_of_size: " + << (8.0 * essentials::vec_bytes(m_ssi.begin_buckets_of_size)) / num_kmers() + << " [bits/kmer] -- " + << perc(8 * essentials::vec_bytes(m_ssi.begin_buckets_of_size), num_bits()) << "%\n"; + + std::cout << " strings: " << (8.0 * m_spss.strings.num_bytes()) / num_kmers() + << " [bits/kmer] -- " << perc(8 * m_spss.strings.num_bytes(), num_bits()) << "%\n"; + std::cout << " skew_index: " << static_cast(m_ssi.ski.num_bits()) / num_kmers() + << " [bits/kmer] -- " << perc(m_ssi.ski.num_bits(), num_bits()) << "%\n"; + std::cout << " weights: " << static_cast(m_weights.num_bits()) / num_kmers() << " [bits/kmer] -- " << perc(m_weights.num_bits(), num_bits()) << "%\n"; - if (weighted()) m_weights.print_space_breakdown(size()); + + if (weighted()) m_weights.print_space_breakdown(num_kmers()); + std::cout << " --------------\n"; - std::cout << " total: " << static_cast(num_bits()) / size() << " [bits/kmer]" + std::cout << " total: " << static_cast(num_bits()) / num_kmers() << " [bits/kmer]" << std::endl; - - // std::cout << " Close-form formula: " << bits_per_kmer_formula(k(), m(), size(), - // num_contigs()) - // << " [bits/kmer]" << std::endl; } -template -void dictionary::print_info() const { +template +void dictionary::print_info() const { std::cout << "=== dictionary info:\n"; std::cout << "version number = " << m_vnum.to_string() << '\n'; - std::cout << "num_kmers = " << size() << '\n'; + std::cout << "num_kmers = " << num_kmers() << '\n'; + std::cout << "num_strings = " << num_strings() << '\n'; std::cout << "k = " << k() << '\n'; - std::cout << "num_minimizers = " << m_minimizers.size() << std::endl; + std::cout << "num_minimizers = " << m_ssi.codewords.size() << std::endl; std::cout << "m = " << m() << '\n'; std::cout << "canonical = " << (canonical() ? "true" : "false") << '\n'; std::cout << "weighted = " << (weighted() ? "true" : "false") << '\n'; - - std::cout << "num_super_kmers = " << m_buckets.offsets.size() << '\n'; - std::cout << "num_pieces = " << m_buckets.pieces.size() << " (+" - << (2.0 * m_buckets.pieces.size() * (k() - 1)) / size() << " [bits/kmer])" << '\n'; - std::cout << "bits_per_offset = ceil(log2(" << m_buckets.strings.num_bits() / 2 - << ")) = " << std::ceil(std::log2(m_buckets.strings.num_bits() / 2)) << '\n'; - uint64_t num_kmers_in_skew_index = m_skew_index.print_info(); - std::cout << "num_kmers_in_skew_index " << num_kmers_in_skew_index << "(" - << (num_kmers_in_skew_index * 100.0) / size() << "%)" << std::endl; - print_space_breakdown(); } diff --git a/src/query.cpp b/src/query.cpp index 06a7707..4175b1f 100644 --- a/src/query.cpp +++ b/src/query.cpp @@ -6,9 +6,10 @@ namespace sshash { -template -streaming_query_report streaming_query_from_fasta_file_multiline(dictionary const* dict, - std::istream& is) { +template +streaming_query_report streaming_query_from_fasta_file_multiline(Dict const* dict, + std::istream& is) // +{ streaming_query_report report; buffered_lines_iterator it(is); std::string buffer; @@ -17,11 +18,12 @@ streaming_query_report streaming_query_from_fasta_file_multiline(dictionary -streaming_query_report streaming_query_from_fasta_file(dictionary const* dict, - std::istream& is) { +template +streaming_query_report streaming_query_from_fasta_file(Dict const* dict, std::istream& is) // +{ streaming_query_report report; std::string line; const uint64_t k = dict->k(); @@ -60,7 +62,7 @@ streaming_query_report streaming_query_from_fasta_file(dictionary const* report.num_kmers += num_kmers; for (uint64_t i = 0; i != num_kmers; ++i) { char const* kmer = line.data() + i; - query.lookup_advanced(kmer); + query.lookup(kmer); } } report.num_searches = query.num_searches(); @@ -73,9 +75,8 @@ streaming_query_report streaming_query_from_fasta_file(dictionary const* return report; } -template -streaming_query_report streaming_query_from_fastq_file(dictionary const* dict, - std::istream& is) { +template +streaming_query_report streaming_query_from_fastq_file(Dict const* dict, std::istream& is) { streaming_query_report report; std::string line; const uint64_t k = dict->k(); @@ -88,9 +89,9 @@ streaming_query_report streaming_query_from_fastq_file(dictionary const* if (line.size() >= k) { const uint64_t num_kmers = line.size() - k + 1; report.num_kmers += num_kmers; - for (uint64_t i = 0; i != line.size() - k + 1; ++i) { + for (uint64_t i = 0; i != num_kmers; ++i) { char const* kmer = line.data() + i; - query.lookup_advanced(kmer); + query.lookup(kmer); } } std::getline(is, line); // skip '+' @@ -106,29 +107,35 @@ streaming_query_report streaming_query_from_fastq_file(dictionary const* return report; } -template -streaming_query_report streaming_query_from_fasta_file(dictionary const* dict, - std::istream& is, bool multiline) { - if (multiline) return streaming_query_from_fasta_file_multiline(dict, is); - return streaming_query_from_fasta_file(dict, is); +template +streaming_query_report streaming_query_from_fasta_file(Dict const* dict, std::istream& is, + bool multiline) // +{ + if (multiline) return streaming_query_from_fasta_file_multiline(dict, is); + return streaming_query_from_fasta_file(dict, is); } -template -streaming_query_report dictionary::streaming_query_from_file(std::string const& filename, - bool multiline) const { +template +streaming_query_report // +dictionary::streaming_query_from_file(std::string const& filename, + bool multiline) const // +{ + using dictionary_type = dictionary; + using regular_query = streaming_query; + using canonical_query = streaming_query; + std::ifstream is(filename.c_str()); if (!is.good()) throw std::runtime_error("error in opening the file '" + filename + "'"); streaming_query_report report; if (util::ends_with(filename, ".fa.gz") or util::ends_with(filename, ".fasta.gz")) { zip_istream zis(is); - if (canonical()) { - report = streaming_query_from_fasta_file>( - this, zis, multiline); + report = streaming_query_from_fasta_file(this, zis, + multiline); } else { - report = streaming_query_from_fasta_file>( - this, zis, multiline); + report = streaming_query_from_fasta_file(this, zis, + multiline); } } else if (util::ends_with(filename, ".fq.gz") or util::ends_with(filename, ".fastq.gz")) { if (multiline) { @@ -137,19 +144,17 @@ streaming_query_report dictionary::streaming_query_from_file(std::string } zip_istream zis(is); if (canonical()) { - report = - streaming_query_from_fastq_file>(this, zis); + report = streaming_query_from_fastq_file(this, zis); } else { - report = - streaming_query_from_fastq_file>(this, zis); + report = streaming_query_from_fastq_file(this, zis); } } else if (util::ends_with(filename, ".fa") or util::ends_with(filename, ".fasta")) { if (canonical()) { - report = streaming_query_from_fasta_file>( - this, is, multiline); + report = streaming_query_from_fasta_file(this, is, + multiline); } else { - report = streaming_query_from_fasta_file>( - this, is, multiline); + report = streaming_query_from_fasta_file(this, is, + multiline); } } else if (util::ends_with(filename, ".fq") or util::ends_with(filename, ".fastq")) { if (multiline) { @@ -157,11 +162,9 @@ streaming_query_report dictionary::streaming_query_from_file(std::string << std::endl; } if (canonical()) { - report = - streaming_query_from_fastq_file>(this, is); + report = streaming_query_from_fastq_file(this, is); } else { - report = - streaming_query_from_fastq_file>(this, is); + report = streaming_query_from_fastq_file(this, is); } } else { std::cerr << "unsupported query file format" << std::endl; diff --git a/src/statistics.cpp b/src/statistics.cpp deleted file mode 100644 index 8ebc945..0000000 --- a/src/statistics.cpp +++ /dev/null @@ -1,65 +0,0 @@ -#include "include/dictionary.hpp" -#include "include/buckets_statistics.hpp" -#include "include/minimizer_iterator.hpp" - -namespace sshash { - -template -void dictionary::compute_statistics() const // -{ - std::cout << "computing bucket statistics..." << std::endl; - - const uint64_t num_kmers = size(); - const uint64_t num_minimizers = m_minimizers.size(); - const uint64_t num_super_kmers = m_buckets.offsets.size(); - - buckets_statistics buckets_stats(num_minimizers, num_kmers, num_super_kmers); - minimizer_iterator minimizer_it(m_k, m_m, m_hasher); - - for (uint64_t bucket_id = 0; bucket_id != num_minimizers; ++bucket_id) { - const auto [begin, end] = m_buckets.locate_bucket(bucket_id); - const uint64_t bucket_size = end - begin; - buckets_stats.add_bucket_size(bucket_size); - for (uint64_t i = begin; i != end; ++i) { - const uint64_t pos_in_seq = m_buckets.offsets.access(i); - auto p = m_buckets.pieces.locate(pos_in_seq); - const uint64_t contig_begin = p.first.val; - const uint64_t contig_end = p.second.val; - uint64_t offset = pos_in_seq; - if (offset <= uint64_t(m_k - m_m)) { - assert(contig_begin == 0); - offset = 0; - } else if (offset - (m_k - m_m) < contig_begin) { - offset = contig_begin; - } else { - offset -= m_k - m_m; - } - kmer_iterator it(m_buckets.strings, m_k, kmer_t::bits_per_char * offset); - minimizer_it.set_position(offset); - uint64_t num_kmers_in_super_kmer = 0; - auto kmer = it.get(); - auto mini_info = minimizer_it.next(kmer); - while (mini_info.pos_in_seq < pos_in_seq) { - it.next(); - kmer = it.get(); - mini_info = minimizer_it.next(kmer); - } - while (mini_info.pos_in_seq == pos_in_seq and - (mini_info.pos_in_seq - mini_info.pos_in_kmer + m_k) <= contig_end) // - { - num_kmers_in_super_kmer += 1; - it.next(); - kmer = it.get(); - mini_info = minimizer_it.next(kmer); - } - assert(num_kmers_in_super_kmer > 0); - buckets_stats.add_num_kmers_in_super_kmer(bucket_size, num_kmers_in_super_kmer); - } - } - - buckets_stats.print_full(); - - std::cout << "DONE" << std::endl; -} - -} // namespace sshash diff --git a/test/check.cpp b/test/check.cpp index 2e2760f..cdcf3cd 100644 --- a/test/check.cpp +++ b/test/check.cpp @@ -1,12 +1,15 @@ #include #include +#include "include/kmer.hpp" #include "include/util.hpp" #include "external/gz/zip_stream.hpp" #include "external/pthash/external/cmd_line_parser/include/parser.hpp" using namespace sshash; +using kmer_type = default_kmer_t; + std::unordered_set parse_file(std::istream& is, const uint64_t k) { std::unordered_set kmers; @@ -31,10 +34,10 @@ std::unordered_set parse_file(std::istream& is, const uint64_t k) { for (uint64_t end = 0; end != sequence.length() - k + 1; ++end) { char const* kmer = sequence.data() + end; - assert(util::is_valid(kmer, k)); - default_kmer_t uint_kmer = util::string_to_uint_kmer(kmer, k); + assert(util::is_valid(kmer, k)); + kmer_type uint_kmer = util::string_to_uint_kmer(kmer, k); - kmers.insert(uint_kmer.kmer); + kmers.insert(uint_kmer.bits); ++num_kmers; } @@ -71,15 +74,15 @@ void query_from_fastq_file(std::string const& query_filename, if (line.size() >= k) { for (uint64_t i = 0; i != line.size() - k + 1; ++i) { char const* kmer = line.data() + i; - if (util::is_valid(kmer, k)) // + if (util::is_valid(kmer, k)) // { - default_kmer_t uint_kmer = util::string_to_uint_kmer(kmer, k); - if (auto it = kmers.find(uint_kmer.kmer); it != kmers.end()) { + kmer_type uint_kmer = util::string_to_uint_kmer(kmer, k); + if (auto it = kmers.find(uint_kmer.bits); it != kmers.end()) { num_positive_kmers += 1; } - default_kmer_t uint_kmer_rc = uint_kmer; + kmer_type uint_kmer_rc = uint_kmer; uint_kmer_rc.reverse_complement_inplace(k); - if (auto it = kmers.find(uint_kmer_rc.kmer); it != kmers.end()) { + if (auto it = kmers.find(uint_kmer_rc.bits); it != kmers.end()) { num_positive_kmers += 1; } } diff --git a/test/check.hpp b/test/check.hpp index 42475cf..e529f1a 100644 --- a/test/check.hpp +++ b/test/check.hpp @@ -1,40 +1,78 @@ #pragma once +#include + namespace sshash { -template -bool check_dictionary(dictionary const& dict) { +template +bool check_dictionary(Dict const& dict) { const uint64_t k = dict.k(); - const uint64_t n = dict.size(); - std::cout << "checking correctness of access and positive lookup..." << std::endl; - uint64_t id = 0; - std::string kmer(k, 0); - for (; id != n; ++id) { - if (id != 0 and id % 5000000 == 0) std::cout << "checked " << id << " kmers" << std::endl; - dict.access(id, kmer.data()); - uint64_t got_id = dict.lookup(kmer.c_str()); - if (got_id == constants::invalid_uint64) { - std::cout << "kmer '" << kmer << "' not found!" << std::endl; - return false; - } - if (got_id >= n) { - std::cout << "ERROR: id out of range " << got_id << "/" << n << std::endl; - return false; + const uint64_t n = dict.num_kmers(); + + const uint64_t num_threads = std::thread::hardware_concurrency(); + std::cout << "checking correctness of access and positive lookup using " << num_threads + << " threads..." << std::endl; + + std::mutex print_mutex; + + auto worker = [&](uint64_t start, uint64_t end, uint64_t thread_id) { + std::string kmer(k, 0); + for (uint64_t id = start; id != end; ++id) // + { + uint64_t count = id - start; + if (count != 0 and count % 15'000'000 == 0) { + std::lock_guard lock(print_mutex); + std::cout << "[Thread " << thread_id << "] Checked " << count + << " kmers (local progress)" << std::endl; + } + + dict.access(id, kmer.data()); + uint64_t got_id = dict.lookup(kmer.c_str()).kmer_id; + + if (got_id == constants::invalid_uint64) { + std::lock_guard lock(print_mutex); + std::cerr << "[Thread " << thread_id << "] kmer '" << kmer << "' not found!\n"; + return; + } + if (got_id >= n) { + std::lock_guard lock(print_mutex); + std::cerr << "[Thread " << thread_id << "] ERROR: id out of range " << got_id << "/" + << n << "\n"; + return; + } + if (got_id != id) { + std::lock_guard lock(print_mutex); + std::cerr << "[Thread " << thread_id << "] expected id " << id << " but got id " + << got_id << "\n"; + return; + } } - if (got_id != id) { - std::cout << "expected id " << id << " but got id " << got_id << std::endl; - return false; + { + std::lock_guard lock(print_mutex); + std::cout << "[Thread " << thread_id << "] Finished range [" << start << ", " << end + << ")\n"; } + }; + + std::vector threads; + threads.reserve(num_threads); + for (uint64_t t = 0, chunk_size = (n + num_threads - 1) / num_threads; t != num_threads; ++t) { + uint64_t start = t * chunk_size; + uint64_t end = std::min(n, start + chunk_size); + threads.emplace_back(worker, start, end, t); } - std::cout << "checked " << id << " kmers" << std::endl; + + for (auto& th : threads) th.join(); + std::cout << "EVERYTHING OK!" << std::endl; + return check_correctness_negative_lookup(dict); } -template -bool check_correctness_negative_lookup(dictionary const& dict) { +template +bool check_correctness_negative_lookup(Dict const& dict) { std::cout << "checking correctness of negative lookup with random kmers..." << std::endl; - const uint64_t num_lookups = std::min(1000000, dict.size()); + const uint64_t num_lookups = std::min(1000000, dict.num_kmers()); std::string kmer(dict.k(), 0); for (uint64_t i = 0; i != num_lookups; ++i) { random_kmer(kmer.data(), dict.k()); @@ -42,8 +80,8 @@ bool check_correctness_negative_lookup(dictionary const& dict) { We could use a std::unordered_set to check if kmer is really absent, but that would take much more memory... */ - uint64_t id = dict.lookup(kmer.c_str()); - if (id != constants::invalid_uint64) { + auto res = dict.lookup(kmer.c_str()); + if (res.kmer_id != constants::invalid_uint64) { std::cout << "kmer '" << kmer << "' found!" << std::endl; } } @@ -51,90 +89,201 @@ bool check_correctness_negative_lookup(dictionary const& dict) { return true; } -template -bool check_correctness_navigational_contig_query(dictionary const& dict) { - std::cout << "checking correctness of navigational queries for contigs..." << std::endl; - const uint64_t num_contigs = dict.num_contigs(); +template +bool check_correctness_navigational_string_query(Dict const& dict) // +{ + using kmer_t = typename Dict::kmer_type; + const uint64_t num_strings = dict.num_strings(); const uint64_t k = dict.k(); - uint64_t kmer_id = 0; - std::string kmer(k, 0); - uint64_t contig_id = 0; - for (; contig_id != num_contigs; ++contig_id) { - if (contig_id != 0 and contig_id % 1000000 == 0) { - std::cout << "checked " << contig_id << "/" << num_contigs << " contigs" << std::endl; - } - auto res = dict.contig_neighbours(contig_id); - uint64_t contig_size = dict.contig_size(contig_id); + const uint64_t num_threads = std::thread::hardware_concurrency(); + std::cout << "checking correctness of navigational queries for strings using " << num_threads + << " threads ..." << std::endl; - uint64_t begin_kmer_id = kmer_id; - dict.access(begin_kmer_id, kmer.data()); - auto backward = dict.kmer_backward_neighbours(kmer.data()); - for (size_t i = 0; i < kmer_t::alphabet_size; i++) { - equal_lookup_result(backward.backward[i], res.backward[i]); - } + std::mutex print_mutex; + + auto worker = [&](uint64_t start, uint64_t end, uint64_t start_kmer_id, uint64_t thread_id) { + std::string kmer(k, 0); + uint64_t kmer_id = start_kmer_id; + + for (uint64_t string_id = start; string_id < end; ++string_id) { + if (string_id != start && (string_id - start) % 1'000'000 == 0) { + std::lock_guard lock(print_mutex); + std::cout << "[Thread " << thread_id << "] checked " << (string_id - start) + << " strings (local progress)\n"; + } + + auto res = dict.string_neighbours(string_id); + uint64_t string_size = dict.string_size(string_id); + + // Check backward neighbours at beginning k-mer + uint64_t begin_kmer_id = kmer_id; + dict.access(begin_kmer_id, kmer.data()); + auto backward = dict.kmer_backward_neighbours(kmer.data()); + for (uint64_t i = 0; i < kmer_t::alphabet_size; i++) { + equal_lookup_result(backward.backward[i], res.backward[i]); + } + + // Check forward neighbours at end k-mer + uint64_t end_kmer_id = kmer_id + string_size - 1; + dict.access(end_kmer_id, kmer.data()); + auto forward = dict.kmer_forward_neighbours(kmer.data()); + for (uint64_t i = 0; i < kmer_t::alphabet_size; i++) { + equal_lookup_result(forward.forward[i], res.forward[i]); + } - uint64_t end_kmer_id = kmer_id + contig_size - 1; - dict.access(end_kmer_id, kmer.data()); - auto forward = dict.kmer_forward_neighbours(kmer.data()); - for (size_t i = 0; i < kmer_t::alphabet_size; i++) { - equal_lookup_result(forward.forward[i], res.forward[i]); + kmer_id += string_size; + } + { + std::lock_guard lock(print_mutex); + std::cout << "[Thread " << thread_id << "] Finished range [" << start << ", " << end + << ")\n"; } - kmer_id += contig_size; + }; + + std::vector threads; + threads.reserve(num_threads); + + for (uint64_t t = 0, current_start = 0, current_kmer_id = 0, + chunk_size = (num_strings + num_threads - 1) / num_threads; + t < num_threads && current_start < num_strings; ++t) // + { + uint64_t start = current_start; + uint64_t end = std::min(num_strings, start + chunk_size); + + // compute starting kmer_id for this thread + uint64_t start_kmer_id = current_kmer_id; + for (uint64_t i = start; i < end; ++i) current_kmer_id += dict.string_size(i); + + threads.emplace_back(worker, start, end, start_kmer_id, t); + current_start = end; } - std::cout << "checked " << contig_id << " contigs" << std::endl; + + for (auto& th : threads) th.join(); + + std::cout << "checked " << num_strings << " strings" << std::endl; std::cout << "EVERYTHING OK!" << std::endl; + return true; } -template -bool check_correctness_kmer_iterator(dictionary const& dict) { - std::cout << "checking correctness of kmer iterator..." << std::endl; - std::string expected_kmer(dict.k(), 0); - constexpr uint64_t runs = 3; - essentials::uniform_int_rng distr(0, dict.size() - 1, essentials::get_random_seed()); - for (uint64_t run = 0; run != runs; ++run) { - uint64_t from_kmer_id = distr.gen(); - auto it = dict.at_kmer_id(from_kmer_id); - while (it.has_next()) { +template +bool check_correctness_kmer_iterator(Dict const& dict) // +{ + const uint64_t num_kmers = dict.num_kmers(); + const uint64_t k = dict.k(); + const uint64_t num_threads = std::thread::hardware_concurrency(); + std::cout << "checking correctness of kmer iterator using " << num_threads << " threads ..." + << std::endl; + + std::mutex print_mutex; + + auto worker = [&](uint64_t start, uint64_t end, uint64_t thread_id) { + assert(end > start); + std::string read_kmer(k, 0); + std::string expected_kmer(k, 0); + for (auto it = dict.at_kmer_id(start); start != end; ++start) { + uint64_t count = end - start; + if (count != 0 and count % 100'000'000 == 0) { + std::lock_guard lock(print_mutex); + std::cout << "[Thread " << thread_id << "] Checked " << count + << " kmers (local progress)" << std::endl; + } auto [kmer_id, kmer] = it.next(); + util::uint_kmer_to_string(kmer, read_kmer.data(), k); dict.access(kmer_id, expected_kmer.data()); - if (kmer != expected_kmer or kmer_id != from_kmer_id) { - std::cout << "got (" << kmer_id << ",'" << kmer << "')"; - std::cout << " but "; - std::cout << "expected (" << from_kmer_id << ",'" << expected_kmer << "')" - << std::endl; - return false; + if (read_kmer != expected_kmer or kmer_id != start) { + std::lock_guard lock(print_mutex); + std::cerr << "[Thread " << thread_id << "] "; + std::cerr << "got (" << kmer_id << ",'" << read_kmer << "')"; + std::cerr << " but "; + std::cerr << "expected (" << start << ",'" << expected_kmer << "')" << std::endl; + return; } - ++from_kmer_id; } - assert(from_kmer_id == dict.size()); + { + std::lock_guard lock(print_mutex); + std::cout << "[Thread " << thread_id << "] Finished range [" << start << ", " << end + << ")\n"; + } + }; + + std::vector threads; + threads.reserve(num_threads); + for (uint64_t t = 0, chunk_size = (num_kmers + num_threads - 1) / num_threads; t != num_threads; + ++t) { + uint64_t start = t * chunk_size; + uint64_t end = std::min(num_kmers, start + chunk_size); + threads.emplace_back(worker, start, end, t); } + + for (auto& th : threads) th.join(); + std::cout << "EVERYTHING OK!" << std::endl; + return true; } -template -bool check_correctness_contig_iterator(dictionary const& dict) { - std::cout << "checking correctness of contig iterator..." << std::endl; - std::string expected_kmer(dict.k(), 0); - for (uint64_t contig_id = 0; contig_id != dict.num_contigs(); ++contig_id) { - auto [begin, _] = dict.contig_offsets(contig_id); - uint64_t from_kmer_id = begin - contig_id * (dict.k() - 1); - auto it = dict.at_contig_id(contig_id); - while (it.has_next()) { - auto [kmer_id, kmer] = it.next(); - dict.access(kmer_id, expected_kmer.data()); - if (kmer != expected_kmer or kmer_id != from_kmer_id) { - std::cout << "got (" << kmer_id << ",'" << kmer << "')"; - std::cout << " but "; - std::cout << "expected (" << from_kmer_id << ",'" << expected_kmer << "')" - << std::endl; - return false; +template +bool check_correctness_string_iterator(Dict const& dict) { + const uint64_t k = dict.k(); + const uint64_t num_strings = dict.num_strings(); + + const uint64_t num_threads = std::thread::hardware_concurrency(); + std::cout << "checking correctness of string iterator using " << num_threads << " threads..." + << std::endl; + + std::mutex print_mutex; + + auto worker = [&](uint64_t start, uint64_t end, uint64_t thread_id) { + std::string read_kmer(k, 0); + std::string expected_kmer(k, 0); + + for (uint64_t string_id = start; string_id < end; ++string_id) { + auto [begin, _] = dict.string_offsets(string_id); + uint64_t from_kmer_id = begin - string_id * (dict.k() - 1); + auto it = dict.at_string_id(string_id); + + while (it.has_next()) { + auto [kmer_id, kmer] = it.next(); + util::uint_kmer_to_string(kmer, read_kmer.data(), k); + dict.access(kmer_id, expected_kmer.data()); + + if (read_kmer != expected_kmer || kmer_id != from_kmer_id) { + std::lock_guard lock(print_mutex); + std::cerr << "[Thread " << thread_id << "] ERROR at string_id " << string_id + << ": got (" << kmer_id << ", '" << read_kmer << "') but expected (" + << from_kmer_id << ", '" << expected_kmer << "')\n"; + return; + } + ++from_kmer_id; + } + + if ((string_id - start) % 1'000'000 == 0 && string_id != start) { + std::lock_guard lock(print_mutex); + std::cout << "[Thread " << thread_id << "] checked " << (string_id - start) + << " strings (local progress)\n"; } - ++from_kmer_id; } + + std::lock_guard lock(print_mutex); + std::cout << "[Thread " << thread_id << "] Finished range [" << start << ", " << end + << ")\n"; + }; + + std::vector threads; + threads.reserve(num_threads); + for (uint64_t t = 0, chunk_size = (num_strings + num_threads - 1) / num_threads; + t < num_threads; ++t) // + { + uint64_t start = t * chunk_size; + uint64_t end = std::min(num_strings, start + chunk_size); + if (start >= end) break; + threads.emplace_back(worker, start, end, t); } + + for (auto& th : threads) th.join(); + std::cout << "EVERYTHING OK!" << std::endl; return true; } diff --git a/test/check_from_file.hpp b/test/check_from_file.hpp index b0f4a7c..8c78e42 100644 --- a/test/check_from_file.hpp +++ b/test/check_from_file.hpp @@ -6,14 +6,16 @@ namespace sshash { -template -bool check_correctness_lookup_access(std::istream& is, dictionary const& dict) { +template +bool check_correctness_lookup_access(std::istream& is, Dict const& dict) // +{ + using kmer_t = typename Dict::kmer_type; const uint64_t k = dict.k(); std::string sequence; uint64_t num_kmers = 0; uint64_t num_sequences = 0; lookup_result prev; - prev.contig_id = 0; + prev.string_id = 0; std::string got_kmer_str(k, 0); std::string expected_kmer_str(k, 0); @@ -22,11 +24,11 @@ bool check_correctness_lookup_access(std::istream& is, dictionary const& while (!is.eof()) // { - if constexpr (fmt == input_file_type::cf_seg) { + if constexpr (fmt == input_file_t::cf_seg) { std::getline(is, sequence, '\t'); // skip '\t' std::getline(is, sequence); // DNA sequence } else { - static_assert(fmt == input_file_type::fasta); + static_assert(fmt == input_file_t::fasta); std::getline(is, sequence); // header sequence std::getline(is, sequence); // DNA sequence } @@ -45,6 +47,8 @@ bool check_correctness_lookup_access(std::istream& is, dictionary const& for (uint64_t i = 0; i + k <= sequence.length(); ++i) { assert(util::is_valid(sequence.data() + i, k)); + // std::cout << "kmer = '" << std::string(sequence.data() + i, k) << "'" << std::endl; + kmer_t uint_kmer = util::string_to_uint_kmer(sequence.data() + i, k); auto orientation = constants::forward_orientation; @@ -59,21 +63,18 @@ bool check_correctness_lookup_access(std::istream& is, dictionary const& } util::uint_kmer_to_string(uint_kmer, expected_kmer_str.data(), k); - uint64_t id = dict.lookup(expected_kmer_str.c_str()); + auto curr = dict.lookup(expected_kmer_str.c_str()); /* Since we assume that we stream through the file from which the index was built, - ids are assigned sequentially to kmers, so it must be id == num_kmers. + ids are assigned sequentially to kmers, so it must be curr.kmer_id == num_kmers. */ - if (id != num_kmers) std::cout << "wrong id assigned" << std::endl; + if (curr.kmer_id != num_kmers) std::cout << "wrong id assigned" << std::endl; - if (id == constants::invalid_uint64) { + if (curr.kmer_id == constants::invalid_uint64) { std::cout << "kmer '" << expected_kmer_str << "' not found!" << std::endl; } - assert(id != constants::invalid_uint64); - - auto curr = dict.lookup_advanced(expected_kmer_str.c_str()); - assert(curr.kmer_id == id); + assert(curr.kmer_id != constants::invalid_uint64); if (curr.kmer_orientation != orientation) { std::cout << "ERROR: got orientation " << int(curr.kmer_orientation) @@ -81,11 +82,13 @@ bool check_correctness_lookup_access(std::istream& is, dictionary const& } assert(curr.kmer_orientation == orientation); + uint64_t curr_string_size = curr.string_end - curr.string_begin - k + 1; + if (num_kmers == 0) { - if (curr.contig_id != 0) { - std::cout << "contig_id " << curr.contig_id << " but expected 0" << std::endl; + if (curr.string_id != 0) { + std::cout << "string_id " << curr.string_id << " but expected 0" << std::endl; } - assert(curr.contig_id == 0); // at the beginning, contig_id must be 0 + assert(curr.string_id == 0); // at the beginning, string_id must be 0 } else { if (curr.kmer_id != prev.kmer_id + 1) { std::cout << "ERROR: got curr.kmer_id " << curr.kmer_id << " but expected " @@ -93,62 +96,64 @@ bool check_correctness_lookup_access(std::istream& is, dictionary const& } assert(curr.kmer_id == prev.kmer_id + 1); // kmer_id must be sequential - if (curr.kmer_id_in_contig >= curr.contig_size) { - std::cout << "ERROR: got curr.kmer_id_in_contig " << curr.kmer_id_in_contig - << " but expected something < " << curr.contig_size << std::endl; + if (curr.kmer_id_in_string >= curr_string_size) { + std::cout << "ERROR: got curr.kmer_id_in_string " << curr.kmer_id_in_string + << " but expected something < " << curr_string_size << std::endl; } - assert(curr.kmer_id_in_contig < - curr.contig_size); // kmer_id_in_contig must always be < contig_size - - if (curr.contig_id == prev.contig_id) { - /* same contig */ - if (curr.contig_size != prev.contig_size) { - std::cout << "ERROR: got curr.contig_size " << curr.contig_size - << " but expected " << prev.contig_size << std::endl; + assert(curr.kmer_id_in_string < + curr_string_size); // kmer_id_in_string must always be < string_size + + if (curr.string_id == prev.string_id) { + /* same string */ + uint64_t prev_string_size = prev.string_end - prev.string_begin - k + 1; + if (curr_string_size != prev_string_size) { + std::cout << "ERROR: got curr_string_size " << curr_string_size + << " but expected " << prev_string_size << std::endl; } - assert(curr.contig_size == prev.contig_size); // contig_size must be same - if (curr.kmer_id_in_contig != prev.kmer_id_in_contig + 1) { - std::cout << "ERROR: got curr.kmer_id_in_contig " << curr.kmer_id_in_contig - << " but expected " << prev.kmer_id_in_contig + 1 << std::endl; + if (curr.kmer_id_in_string != prev.kmer_id_in_string + 1) { + std::cout << "ERROR: got curr.kmer_id_in_string " << curr.kmer_id_in_string + << " but expected " << prev.kmer_id_in_string + 1 << std::endl; } - assert(curr.kmer_id_in_contig == - prev.kmer_id_in_contig + 1); // kmer_id_in_contig must be sequential + assert(curr.kmer_id_in_string == + prev.kmer_id_in_string + 1); // kmer_id_in_string must be sequential } else { - /* we have changed contig */ - if (curr.contig_id != prev.contig_id + 1) { - std::cout << "ERROR: got curr.contig_id " << curr.contig_id - << " but expected " << prev.contig_id + 1 << std::endl; + /* we have changed string */ + if (curr.string_id != prev.string_id + 1) { + std::cout << "ERROR: got curr.string_id " << curr.string_id + << " but expected " << prev.string_id + 1 << std::endl; } - assert(curr.contig_id == - prev.contig_id + 1); // contig_id must be sequential since we stream - if (curr.kmer_id_in_contig != 0) { - std::cout << "ERROR: got curr.kmer_id_in_contig " << curr.kmer_id_in_contig + assert(curr.string_id == + prev.string_id + 1); // string_id must be sequential since we stream + if (curr.kmer_id_in_string != 0) { + std::cout << "ERROR: got curr.kmer_id_in_string " << curr.kmer_id_in_string << " but expected 0" << std::endl; } - assert(curr.kmer_id_in_contig == - 0); // kmer_id_in_contig must be 0 when we change contig + assert(curr.kmer_id_in_string == + 0); // kmer_id_in_string must be 0 when we change string } } - /* check also contig_size() */ - uint64_t contig_size = dict.contig_size(curr.contig_id); - if (contig_size != curr.contig_size) { - std::cout << "ERROR: got contig_size " << contig_size << " but expected " - << curr.contig_size << std::endl; + /* check also string_size() */ + uint64_t string_size = dict.string_size(curr.string_id); + if (string_size != curr_string_size) { + std::cout << "ERROR: got string_size " << string_size << " but expected " + << curr_string_size << std::endl; } - assert(contig_size == curr.contig_size); + assert(string_size == curr_string_size); prev = curr; // check access - dict.access(id, got_kmer_str.data()); + dict.access(curr.kmer_id, got_kmer_str.data()); kmer_t got_uint_kmer = util::string_to_uint_kmer(got_kmer_str.data(), k); kmer_t got_uint_kmer_rc = got_uint_kmer; got_uint_kmer_rc.reverse_complement_inplace(k); if (got_uint_kmer != uint_kmer and got_uint_kmer_rc != uint_kmer) { std::cout << "ERROR: got '" << got_kmer_str << "' but expected '" << expected_kmer_str << "'" << std::endl; + return false; } + ++num_kmers; } } @@ -158,8 +163,10 @@ bool check_correctness_lookup_access(std::istream& is, dictionary const& return check_correctness_negative_lookup(dict); } -template -bool check_correctness_navigational_kmer_query(std::istream& is, dictionary const& dict) { +template +bool check_correctness_navigational_kmer_query(std::istream& is, Dict const& dict) // +{ + using kmer_t = typename Dict::kmer_type; const uint64_t k = dict.k(); std::string sequence; uint64_t num_kmers = 0; @@ -168,11 +175,11 @@ bool check_correctness_navigational_kmer_query(std::istream& is, dictionary -bool check_correctness_weights(std::istream& is, dictionary const& dict) { +template +bool check_correctness_weights(std::istream& is, Dict const& dict) { uint64_t k = dict.k(); std::string line; uint64_t kmer_id = 0; @@ -267,23 +274,24 @@ bool check_correctness_weights(std::istream& is, dictionary const& dict) The input file must be the one the index was built from. Throughout the code, we assume the input does not contain any duplicate. */ -template -bool check_correctness_lookup_access(dictionary const& dict, std::string const& filename) { +template +bool check_correctness_lookup_access(Dict const& dict, std::string const& filename) // +{ std::ifstream is(filename.c_str()); if (!is.good()) throw std::runtime_error("error in opening the file '" + filename + "'"); bool good = true; if (util::ends_with(filename, ".gz")) { zip_istream zis(is); if (util::ends_with(filename, ".cf_seg.gz")) { - good = check_correctness_lookup_access(zis, dict); + good = check_correctness_lookup_access(zis, dict); } else { - good = check_correctness_lookup_access(zis, dict); + good = check_correctness_lookup_access(zis, dict); } } else { if (util::ends_with(filename, ".cf_seg")) { - good = check_correctness_lookup_access(is, dict); + good = check_correctness_lookup_access(is, dict); } else { - good = check_correctness_lookup_access(is, dict); + good = check_correctness_lookup_access(is, dict); } } is.close(); @@ -294,28 +302,23 @@ bool check_correctness_lookup_access(dictionary const& dict, std::string The input file must be the one the index was built from. Throughout the code, we assume the input does not contain any duplicate. */ -template -bool check_correctness_navigational_kmer_query(dictionary const& dict, - std::string const& filename) { +template +bool check_correctness_navigational_kmer_query(Dict const& dict, std::string const& filename) { std::ifstream is(filename.c_str()); if (!is.good()) throw std::runtime_error("error in opening the file '" + filename + "'"); bool good = true; if (util::ends_with(filename, ".gz")) { zip_istream zis(is); if (util::ends_with(filename, ".cf_seg.gz")) { - good = check_correctness_navigational_kmer_query(zis, - dict); + good = check_correctness_navigational_kmer_query(zis, dict); } else { - good = check_correctness_navigational_kmer_query(zis, - dict); + good = check_correctness_navigational_kmer_query(zis, dict); } } else { if (util::ends_with(filename, ".cf_seg")) { - good = check_correctness_navigational_kmer_query(is, - dict); + good = check_correctness_navigational_kmer_query(is, dict); } else { - good = - check_correctness_navigational_kmer_query(is, dict); + good = check_correctness_navigational_kmer_query(is, dict); } } is.close(); @@ -326,8 +329,8 @@ bool check_correctness_navigational_kmer_query(dictionary const& dict, The input file must be the one the index was built from. Only for FASTA files since CUTTLEFISH does not store kmer weights. */ -template -bool check_correctness_weights(dictionary const& dict, std::string const& filename) { +template +bool check_correctness_weights(Dict const& dict, std::string const& filename) { std::ifstream is(filename.c_str()); if (!is.good()) throw std::runtime_error("error in opening the file '" + filename + "'"); bool good = true; diff --git a/test/test_alphabet.cpp b/test/test_alphabet.cpp index 42b8df4..5e798db 100644 --- a/test/test_alphabet.cpp +++ b/test/test_alphabet.cpp @@ -1,3 +1,4 @@ +#include "include/kmer.hpp" #include "include/util.hpp" #include "tools/common.hpp" // for random_kmer diff --git a/tools/build.cpp b/tools/build.cpp index 9c9d738..f0dab7e 100644 --- a/tools/build.cpp +++ b/tools/build.cpp @@ -1,6 +1,5 @@ using namespace sshash; -template int build(int argc, char** argv) { cmd_line_parser::parser parser(argc, argv); @@ -14,7 +13,8 @@ int build(int argc, char** argv) { "\t- one DNA sequence per line.\n" "\tFor example, it could be the de Bruijn graph topology output by BCALM or CUTTLEFISH.", "-i", true); - parser.add("k", "K-mer length (must be <= " + std::to_string(kmer_t::max_k) + ").", "-k", true); + parser.add("k", "K-mer length (must be <= " + std::to_string(default_kmer_t::max_k) + ").", + "-k", true); parser.add("m", "Minimizer length (must be < k).", "-m", true); /* Optional arguments. */ @@ -22,12 +22,6 @@ int build(int argc, char** argv) { "Seed for construction (default is " + std::to_string(constants::seed) + ").", "-s", false); parser.add("t", "Number of threads (default is 1).", "-t", false); - parser.add("l", - "A (integer) constant that controls the space/time trade-off of the dictionary. " - "A reasonable values lies in [2.." + - std::to_string(constants::max_l) + "). The default value is " + - std::to_string(constants::min_l) + ".", - "-l", false); parser.add("lambda", "A (floating point) constant that trades construction speed for space effectiveness " "of minimal perfect hashing. " @@ -51,7 +45,6 @@ int build(int argc, char** argv) { parser.add("weighted", "Also store the weights in compressed format.", "--weighted", false, true); parser.add("check", "Check correctness after construction.", "--check", false, true); - parser.add("bench", "Run benchmark after construction.", "--bench", false, true); parser.add("verbose", "Verbose output during construction.", "--verbose", false, true); if (!parser.parse()) return 0; @@ -60,14 +53,11 @@ int build(int argc, char** argv) { auto k = parser.get("k"); auto m = parser.get("m"); - dictionary dict; - build_configuration build_config; build_config.k = k; build_config.m = m; if (parser.parsed("seed")) build_config.seed = parser.get("seed"); - if (parser.parsed("l")) build_config.l = parser.get("l"); if (parser.parsed("lambda")) build_config.lambda = parser.get("lambda"); build_config.canonical = parser.get("canonical"); build_config.weighted = parser.get("weighted"); @@ -81,26 +71,22 @@ int build(int argc, char** argv) { } if (parser.parsed("t")) build_config.num_threads = parser.get("t"); - build_config.print(); + // build_config.print(); + essentials::logger("building data structure..."); + dictionary_type dict; dict.build(input_filename, build_config); - assert(dict.k() == k); bool check = parser.get("check"); if (check) { check_correctness_lookup_access(dict, input_filename); check_correctness_navigational_kmer_query(dict, input_filename); - check_correctness_navigational_contig_query(dict); + check_correctness_navigational_string_query(dict); if (build_config.weighted) check_correctness_weights(dict, input_filename); check_correctness_kmer_iterator(dict); - check_correctness_contig_iterator(dict); - } - bool bench = parser.get("bench"); - if (bench) { - perf_test_lookup_access(dict); - if (dict.weighted()) perf_test_lookup_weight(dict); - perf_test_iterator(dict); + check_correctness_string_iterator(dict); } + if (parser.parsed("output_filename")) { auto output_filename = parser.get("output_filename"); essentials::logger("saving data structure to disk..."); diff --git a/tools/common.hpp b/tools/common.hpp index 6ee652d..42acb52 100644 --- a/tools/common.hpp +++ b/tools/common.hpp @@ -1,23 +1,28 @@ #pragma once #include "external/pthash/external/cmd_line_parser/include/parser.hpp" -#include "include/dictionary.hpp" +#include "include/dictionary_types.hpp" #include namespace sshash { +void print_cmd(int argc, char** argv) { + for (int i = 0; i != argc; ++i) std::cout << argv[i] << ' '; + std::cout << std::endl; +} + void random_kmer(char* kmer, uint64_t k) { for (uint64_t i = 0; i != k; ++i) kmer[i] = "ACGT"[rand() % 4]; } -template -void load_dictionary(dictionary& dict, std::string const& index_filename, bool verbose) { +template +void load_dictionary(Dict& dict, std::string const& index_filename, bool verbose) { const uint64_t num_bytes_read = essentials::load(dict, index_filename.c_str()); if (verbose) { std::cout << "total index size: " << num_bytes_read << " [B] -- " << essentials::convert(num_bytes_read, essentials::MB) << " [MB] (" - << (num_bytes_read * 8.0) / dict.size() << " [bits/kmer])" << std::endl; + << (num_bytes_read * 8.0) / dict.num_kmers() << " [bits/kmer])" << std::endl; dict.print_info(); } } diff --git a/tools/perf.hpp b/tools/perf.hpp index 9062b60..7eabb4e 100644 --- a/tools/perf.hpp +++ b/tools/perf.hpp @@ -2,32 +2,40 @@ namespace sshash { -template -void perf_test_iterator(dictionary const& dict) { - essentials::timer t; +namespace perf { +using timer_type = essentials::timer; +} + +template +void perf_test_iterator(Dict const& dict, essentials::json_lines& perf_stats) { + perf::timer_type t; t.start(); auto it = dict.begin(); - while (it.has_next()) { + uint64_t n = std::min(dict.num_kmers(), 100'000'000); + for (uint64_t i = 0; i != n; ++i) { auto [kmer_id, kmer] = it.next(); essentials::do_not_optimize_away(kmer_id); - essentials::do_not_optimize_away(kmer[0]); + essentials::do_not_optimize_away(kmer.at(0)); } t.stop(); - double avg_nanosec = t.elapsed() / dict.size(); - std::cout << "iterator: avg_nanosec_per_kmer " << avg_nanosec << std::endl; + double avg_nanosec = t.elapsed() / n; + std::cout << "iterator (avg_nanosec_per_kmer) = " << avg_nanosec << std::endl; + perf_stats.add("iterator (avg_nanosec_per_kmer)", avg_nanosec); } -template -void perf_test_lookup_access(dictionary const& dict) { - constexpr uint64_t num_queries = 1000000; +template +void perf_test_lookup_access(Dict const& dict, essentials::json_lines& perf_stats) // +{ + using kmer_t = typename Dict::kmer_type; + constexpr uint64_t num_queries = 1'000'000; constexpr uint64_t runs = 5; - essentials::uniform_int_rng distr(0, dict.size() - 1, essentials::get_random_seed()); - uint64_t k = dict.k(); + essentials::uniform_int_rng distr(0, dict.num_kmers() - 1, + essentials::get_random_seed()); + const uint64_t k = dict.k(); std::string kmer(k, 0); std::string kmer_rc(k, 0); { - // perf test positive lookup std::vector lookup_queries; lookup_queries.reserve(num_queries); for (uint64_t i = 0; i != num_queries; ++i) { @@ -41,92 +49,49 @@ void perf_test_lookup_access(dictionary const& dict) { lookup_queries.push_back(kmer); } } - essentials::timer t; - t.start(); - for (uint64_t r = 0; r != runs; ++r) { - for (auto const& string : lookup_queries) { - auto id = dict.lookup(string.c_str()); - essentials::do_not_optimize_away(id); - } - } - t.stop(); - double nanosec_per_lookup = t.elapsed() / (runs * lookup_queries.size()); - std::cout << "avg_nanosec_per_positive_lookup " << nanosec_per_lookup << std::endl; - } - { - // perf test negative lookup - std::vector lookup_queries; - lookup_queries.reserve(num_queries); - for (uint64_t i = 0; i != num_queries; ++i) { - random_kmer(kmer.data(), k); - lookup_queries.push_back(kmer); - } - essentials::timer t; - t.start(); - for (uint64_t r = 0; r != runs; ++r) { - for (auto const& string : lookup_queries) { - auto id = dict.lookup(string.c_str()); - essentials::do_not_optimize_away(id); - } - } - t.stop(); - double nanosec_per_lookup = t.elapsed() / (runs * lookup_queries.size()); - std::cout << "avg_nanosec_per_negative_lookup " << nanosec_per_lookup << std::endl; - } - { - // perf test positive lookup_advanced - std::vector lookup_queries; - lookup_queries.reserve(num_queries); - for (uint64_t i = 0; i != num_queries; ++i) { - uint64_t id = distr.gen(); - dict.access(id, kmer.data()); - if ((i & 1) == 0) { - /* transform 50% of the kmers into their reverse complements */ - kmer_t::compute_reverse_complement(kmer.data(), kmer_rc.data(), k); - lookup_queries.push_back(kmer_rc); - } else { - lookup_queries.push_back(kmer); - } - } - essentials::timer t; + + perf::timer_type t; t.start(); for (uint64_t r = 0; r != runs; ++r) { for (auto const& string : lookup_queries) { - auto res = dict.lookup_advanced(string.c_str()); + auto res = dict.lookup(string.c_str()); essentials::do_not_optimize_away(res.kmer_id); } } t.stop(); double nanosec_per_lookup = t.elapsed() / (runs * lookup_queries.size()); - std::cout << "avg_nanosec_per_positive_lookup_advanced " << nanosec_per_lookup << std::endl; + std::cout << "positive lookup (avg_nanosec_per_kmer) = " << nanosec_per_lookup << std::endl; + perf_stats.add("positive lookup (avg_nanosec_per_kmer)", nanosec_per_lookup); } + { - // perf test negative lookup_advanced + // perf test negative lookup std::vector lookup_queries; lookup_queries.reserve(num_queries); for (uint64_t i = 0; i != num_queries; ++i) { random_kmer(kmer.data(), k); lookup_queries.push_back(kmer); } - essentials::timer t; + perf::timer_type t; t.start(); for (uint64_t r = 0; r != runs; ++r) { for (auto const& string : lookup_queries) { - auto res = dict.lookup_advanced(string.c_str()); + auto res = dict.lookup(string.c_str()); essentials::do_not_optimize_away(res.kmer_id); } } t.stop(); double nanosec_per_lookup = t.elapsed() / (runs * lookup_queries.size()); - std::cout << "avg_nanosec_per_negative_lookup_advanced " << nanosec_per_lookup << std::endl; + std::cout << "negative lookup (avg_nanosec_per_kmer) " << nanosec_per_lookup << std::endl; + perf_stats.add("negative lookup (avg_nanosec_per_kmer)", nanosec_per_lookup); } + { // perf test access std::vector access_queries; access_queries.reserve(num_queries); - for (uint64_t i = 0; i != num_queries; ++i) access_queries.push_back(distr.gen()); - essentials::timer t; + perf::timer_type t; t.start(); for (uint64_t r = 0; r != runs; ++r) { for (auto id : access_queries) { @@ -136,20 +101,25 @@ void perf_test_lookup_access(dictionary const& dict) { } t.stop(); double nanosec_per_access = t.elapsed() / static_cast(runs * access_queries.size()); - std::cout << "avg_nanosec_per_access " << nanosec_per_access << std::endl; + std::cout << "access (avg_nanosec_per_kmer) = " << nanosec_per_access << std::endl; + perf_stats.add("access (avg_nanosec_per_kmer)", nanosec_per_access); } } -template -void perf_test_lookup_weight(dictionary const& dict) { +template +void perf_test_lookup_weight(Dict const& dict, essentials::json_lines& perf_stats) // +{ + using kmer_t = typename Dict::kmer_type; + if (!dict.weighted()) { std::cerr << "ERROR: the dictionary does not store weights" << std::endl; return; } - constexpr uint64_t num_queries = 1000000; + constexpr uint64_t num_queries = 1'000'000; constexpr uint64_t runs = 5; - essentials::uniform_int_rng distr(0, dict.size() - 1, essentials::get_random_seed()); + essentials::uniform_int_rng distr(0, dict.num_kmers() - 1, + essentials::get_random_seed()); uint64_t k = dict.k(); std::string kmer(k, 0); std::string kmer_rc(k, 0); @@ -168,18 +138,20 @@ void perf_test_lookup_weight(dictionary const& dict) { } } - essentials::timer t; + perf::timer_type t; t.start(); for (uint64_t r = 0; r != runs; ++r) { for (auto const& string : lookup_queries) { - auto id = dict.lookup(string.c_str()); - auto w = dict.weight(id); + auto res = dict.lookup(string.c_str()); + auto w = dict.weight(res.kmer_id); essentials::do_not_optimize_away(w); } } t.stop(); double nanosec_per_lookup = t.elapsed() / (runs * lookup_queries.size()); - std::cout << "avg_nanosec_per_positive_lookup_with_weight " << nanosec_per_lookup << std::endl; + std::cout << "positive lookup + weight (avg_nanosec_per_kmer) = " << nanosec_per_lookup + << std::endl; + perf_stats.add("positive lookup + weight (avg_nanosec_per_kmer)", nanosec_per_lookup); } } // namespace sshash \ No newline at end of file diff --git a/tools/permute.cpp b/tools/permute.cpp index 659ebf1..5dd1b1a 100644 --- a/tools/permute.cpp +++ b/tools/permute.cpp @@ -3,7 +3,6 @@ using namespace sshash; -template int permute(int argc, char** argv) { cmd_line_parser::parser parser(argc, argv); @@ -17,7 +16,8 @@ int permute(int argc, char** argv) { "\tFor example, it could be the de Bruijn graph topology output " "by BCALM.", "-i", true); - parser.add("k", "K-mer length (must be <= " + std::to_string(kmer_t::max_k) + ").", "-k", true); + parser.add("k", "K-mer length (must be <= " + std::to_string(default_kmer_t::max_k) + ").", + "-k", true); /* Optional arguments. */ parser.add("output_filename", "Output file where the permuted collection will be written.", @@ -38,8 +38,8 @@ int permute(int argc, char** argv) { std::cerr << "k must be > 0" << std::endl; return 1; } - if (k > kmer_t::max_k) { - std::cerr << "k must be less <= " + std::to_string(kmer_t::max_k) + + if (k > default_kmer_t::max_k) { + std::cerr << "k must be less <= " + std::to_string(default_kmer_t::max_k) + " but got k = " + std::to_string(k) << std::endl; return 1; @@ -94,7 +94,8 @@ int permute(int argc, char** argv) { } /* permute and save to output file */ - permute_and_write(input_filename, output_filename, tmp_dirname, permutation, signs, k); + permute_and_write(input_filename, output_filename, tmp_dirname, permutation, + signs, k); std::remove(permutation_filename.c_str()); return 0; diff --git a/tools/query.cpp b/tools/query.cpp index 0127907..67eec13 100644 --- a/tools/query.cpp +++ b/tools/query.cpp @@ -3,7 +3,6 @@ using namespace sshash; -template int query(int argc, char** argv) { cmd_line_parser::parser parser(argc, argv); parser.add("index_filename", "Must be a file generated with the tool 'build'.", "-i", true); @@ -23,16 +22,27 @@ int query(int argc, char** argv) { bool verbose = parser.get("verbose"); bool multiline = parser.get("multiline"); - dictionary dict; + dictionary_type dict; load_dictionary(dict, index_filename, verbose); essentials::logger("performing queries from file '" + query_filename + "'..."); - essentials::timer t; + essentials::timer t; t.start(); auto report = dict.streaming_query_from_file(query_filename, multiline); t.stop(); essentials::logger("DONE"); + essentials::json_lines query_stats; + query_stats.add("index_filename", index_filename.c_str()); + query_stats.add("query_filename", query_filename.c_str()); + query_stats.add("num_kmers", report.num_kmers); + query_stats.add("num_positive_kmers", report.num_positive_kmers); + query_stats.add("num_negative_kmers", report.num_negative_kmers); + query_stats.add("num_invalid_kmers", report.num_invalid_kmers); + query_stats.add("num_searches", report.num_searches); + query_stats.add("num_extensions", report.num_extensions); + query_stats.add("elapsed_millisec", uint64(t.elapsed())); + std::cout << "==== query report:\n"; std::cout << "num_kmers = " << report.num_kmers << std::endl; std::cout << "num_positive_kmers = " << report.num_positive_kmers << " (" @@ -47,10 +57,11 @@ int query(int argc, char** argv) { std::cout << "num_extensions = " << report.num_extensions << "/" << report.num_positive_kmers << " (" << (report.num_extensions * 100.0) / report.num_positive_kmers << "%)" << std::endl; - std::cout << "elapsed = " << t.elapsed() / 1000 << " millisec / "; - std::cout << t.elapsed() / 1000000 << " sec / "; - std::cout << t.elapsed() / 1000000 / 60 << " min / "; - std::cout << (t.elapsed() * 1000) / report.num_kmers << " ns/kmer" << std::endl; + std::cout << "elapsed = " << t.elapsed() / 1000 << " sec / "; + std::cout << t.elapsed() / 1000 / 60 << " min / "; + std::cout << (t.elapsed() * 1e6) / report.num_kmers << " ns/kmer" << std::endl; + + query_stats.print(); return 0; } \ No newline at end of file diff --git a/tools/sshash.cpp b/tools/sshash.cpp index 62d81de..446d974 100644 --- a/tools/sshash.cpp +++ b/tools/sshash.cpp @@ -10,7 +10,6 @@ #include "src/dictionary.cpp" #include "src/query.cpp" #include "src/info.cpp" -#include "src/statistics.cpp" #include "build.cpp" #include "query.cpp" @@ -18,7 +17,6 @@ using namespace sshash; -template int check(int argc, char** argv) { cmd_line_parser::parser parser(argc, argv); parser.add("index_filename", "Must be a file generated with the tool 'build'.", "-i", true); @@ -26,16 +24,15 @@ int check(int argc, char** argv) { if (!parser.parse()) return 0; auto index_filename = parser.get("index_filename"); bool verbose = parser.get("verbose"); - dictionary dict; + dictionary_type dict; load_dictionary(dict, index_filename, verbose); check_dictionary(dict); - check_correctness_navigational_contig_query(dict); + check_correctness_navigational_string_query(dict); check_correctness_kmer_iterator(dict); - check_correctness_contig_iterator(dict); + check_correctness_string_iterator(dict); return 0; } -template int bench(int argc, char** argv) { cmd_line_parser::parser parser(argc, argv); parser.add("index_filename", "Must be a file generated with the tool 'build'.", "-i", true); @@ -43,58 +40,59 @@ int bench(int argc, char** argv) { if (!parser.parse()) return 0; auto index_filename = parser.get("index_filename"); bool verbose = parser.get("verbose"); - dictionary dict; + dictionary_type dict; load_dictionary(dict, index_filename, verbose); - perf_test_lookup_access(dict); - if (dict.weighted()) perf_test_lookup_weight(dict); - perf_test_iterator(dict); - return 0; -} -template -int compute_statistics(int argc, char** argv) { - cmd_line_parser::parser parser(argc, argv); - parser.add("index_filename", "Must be a file generated with the tool 'build'.", "-i", true); - parser.add("verbose", "Verbose output.", "--verbose", false, true); - if (!parser.parse()) return 0; - auto index_filename = parser.get("index_filename"); - bool verbose = parser.get("verbose"); - dictionary dict; - load_dictionary(dict, index_filename, verbose); - dict.compute_statistics(); + essentials::json_lines perf_stats; + perf_stats.add("index_filename", index_filename.c_str()); + perf_stats.add("k", dict.k()); + perf_stats.add("m", dict.m()); + perf_stats.add("canonical", dict.canonical() ? "true" : "false"); + + perf_test_lookup_access(dict, perf_stats); + if (dict.weighted()) perf_test_lookup_weight(dict, perf_stats); + perf_test_iterator(dict, perf_stats); + + perf_stats.print(); + return 0; } int help(char* arg0) { - std::cout << "== SSHash: (S)parse and (S)kew (Hash)ing of k-mers =========================" - << std::endl + std::cout << "== SSHash: (S)parse and (S)kew (Hash)ing of k-mers "; + std::cout << "(v" + << essentials::version_number(constants::current_version_number::x, + constants::current_version_number::y, + constants::current_version_number::z) + .to_string() + << ") ==" << std::endl << std::endl; std::cout << "Usage: " << arg0 << " ...\n\n" << "Available tools:\n" - << " build \t build a dictionary \n" - << " query \t query a dictionary \n" - << " check \t check correctness of a dictionary \n" - << " bench \t run performance tests for a dictionary \n" - << " permute \t permute a weighted input file \n" - << " compute-statistics \t compute index statistics " << std::endl; + << " build build a dictionary \n" + << " query query a dictionary \n" + << " check check correctness of a dictionary \n" + << " bench run performance tests for a dictionary \n" + << " permute permute a weighted input file \n" + << std::endl; + return 0; } int main(int argc, char** argv) { if (argc < 2) return help(argv[0]); + print_cmd(argc, argv); auto tool = std::string(argv[1]); if (tool == "build") { - return build(argc - 1, argv + 1); + return build(argc - 1, argv + 1); } else if (tool == "query") { - return query(argc - 1, argv + 1); + return query(argc - 1, argv + 1); } else if (tool == "check") { - return check(argc - 1, argv + 1); + return check(argc - 1, argv + 1); } else if (tool == "bench") { - return bench(argc - 1, argv + 1); + return bench(argc - 1, argv + 1); } else if (tool == "permute") { - return permute(argc - 1, argv + 1); - } else if (tool == "compute-statistics") { - return compute_statistics(argc - 1, argv + 1); + return permute(argc - 1, argv + 1); } std::cout << "Unsupported tool '" << tool << "'.\n" << std::endl; return help(argv[0]);