diff --git a/CMakeLists.txt b/CMakeLists.txt index 58e8054..c8bed87 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,14 +54,6 @@ MESSAGE(STATUS "Installation prefix: ${CMAKE_INSTALL_PREFIX}") MESSAGE(STATUS "Compiling for processor: ${CMAKE_SYSTEM_PROCESSOR}") MESSAGE(STATUS "Compiling with flags:${CMAKE_CXX_FLAGS}") -include_directories(.) # all include paths relative to parent directory -include_directories(external/pthash/include) -include_directories(external/pthash/external/bits/include) -include_directories(external/pthash/external/fastmod) -include_directories(external/pthash/external/bits/external/essentials/include) -include_directories(external/pthash/external/xxHash) -include_directories(external/pthash/external/mm_file/include) - set(Z_LIB_SOURCES external/gz/zip_stream.cpp ) @@ -74,13 +66,27 @@ set(SSHASH_SOURCES src/statistics.cpp ) +set(SSHASH_INCLUDE_DIRS + external/pthash/include + external/pthash/external/bits/include + external/pthash/external/fastmod + external/pthash/external/bits/external/essentials/include + external/pthash/external/xxHash + external/pthash/external/mm_file/include + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/include +) + # Create a static lib add_library(sshash_static STATIC ${Z_LIB_SOURCES} ${SSHASH_SOURCES} ) +target_include_directories(sshash_static PUBLIC ${SSHASH_INCLUDE_DIRS}) + add_executable(sshash tools/sshash.cpp) +target_include_directories(sshash PUBLIC ${SSHASH_INCLUDE_DIRS}) target_link_libraries(sshash z ) diff --git a/include/builder/build_sparse_index.hpp b/include/builder/build_sparse_index.hpp index 5db9084..fe9ccdb 100644 --- a/include/builder/build_sparse_index.hpp +++ b/include/builder/build_sparse_index.hpp @@ -82,9 +82,9 @@ buckets_statistics build_sparse_index(parse_data& data, buckets& } } offsets.push_back(num_super_kmers); - assert(offsets.size() == num_threads + 1); - std::vector threads_buckets_stats(num_threads); + std::vector threads_buckets_stats; + threads_buckets_stats.reserve(num_threads); auto exe = [&](const uint64_t thread_id) { assert(thread_id + 1 < offsets.size()); @@ -115,11 +115,12 @@ buckets_statistics build_sparse_index(parse_data& data, buckets& } }; - std::vector threads(num_threads); - for (uint64_t thread_id = 0; thread_id != num_threads; ++thread_id) { - threads_buckets_stats[thread_id] = - buckets_statistics(num_buckets, num_kmers, num_minimizer_positions); - threads[thread_id] = std::thread(exe, thread_id); + std::vector threads; + threads.reserve(num_threads); + assert(offsets.size() <= num_threads + 1); + for (uint64_t thread_id = 0; thread_id + 1 < size(offsets); ++thread_id) { + threads_buckets_stats.emplace_back(num_buckets, num_kmers, num_minimizer_positions); + threads.emplace_back(exe, thread_id); } for (auto& t : threads) { if (t.joinable()) t.join(); diff --git a/include/builder/parallel_sort.hpp b/include/builder/parallel_sort.hpp index 66668e0..9b6c63c 100644 --- a/include/builder/parallel_sort.hpp +++ b/include/builder/parallel_sort.hpp @@ -55,18 +55,18 @@ void parallel_merge(Iterator A_begin, Iterator A_end, // working memory, which must be of same size as data. */ template -void parallel_sort(std::vector& data, const uint64_t num_threads, Compare comp) // +void parallel_sort(std::vector& data, uint64_t num_threads, Compare comp) // { std::vector temp_data; temp_data.resize(data.size()); + const uint64_t data_size = data.size(); + num_threads = std::min(num_threads, data_size); if (num_threads <= 1) { std::sort(data.begin(), data.end(), comp); return; } - assert((num_threads & (num_threads - 1)) == 0); - const uint64_t data_size = data.size(); const uint64_t chunk_size = (data_size + num_threads - 1) / num_threads; const uint64_t sequential_merge_threshold = data_size / uint64_t(std::log2(num_threads)); assert(sequential_merge_threshold > 0); @@ -75,12 +75,13 @@ void parallel_sort(std::vector& data, const uint64_t num_threads, Compare com threads.reserve(num_threads); using iterator_t = typename std::vector::iterator; - std::vector> ranges(num_threads); + std::vector> ranges; + ranges.reserve(num_threads); - for (uint64_t i = 0; i != num_threads; ++i) { + for (uint64_t i = 0; i * chunk_size < data_size; ++i) { uint64_t begin = i * chunk_size; - uint64_t end = (i == num_threads - 1) ? data_size : begin + chunk_size; - ranges[i] = {data.begin() + begin, data.begin() + end}; + uint64_t end = std::min(data_size, begin + chunk_size); + ranges.emplace_back(data.begin() + begin, data.begin() + end); threads.emplace_back( [&, begin, end]() { std::sort(data.begin() + begin, data.begin() + end, comp); }); } @@ -94,27 +95,29 @@ void parallel_sort(std::vector& data, const uint64_t num_threads, Compare com while (ranges.size() != 1) // { next_ranges.clear(); - for (uint64_t i = 0; i != ranges.size(); i += 2) { + for (uint64_t i = 0; i < ranges.size(); i += 2) { + auto [begin1, end1] = ranges[i]; + + auto input = data.begin(); + auto output = temp_data.begin(); + if (swap) std::swap(input, output); + uint64_t offset = std::distance(input, begin1); + auto output_iterator = output + offset; + assert(offset <= data_size); + uint64_t output_size = end1 - begin1; if (i + 1 < ranges.size()) { - auto [begin1, end1] = ranges[i]; auto [begin2, end2] = ranges[i + 1]; - uint64_t output_size = (end1 - begin1) + (end2 - begin2); - - auto input = data.begin(); - auto output = temp_data.begin(); - if (swap) std::swap(input, output); - uint64_t offset = std::distance(input, begin1); - auto output_iterator = output + offset; - assert(offset <= data_size); + output_size += end2 - begin2; parallel_merge(begin1, end1, begin2, end2, output_iterator, comp, sequential_merge_threshold); assert(std::is_sorted(output_iterator, output_iterator + output_size, comp)); - - auto merged_begin = output_iterator; - auto merged_end = merged_begin + output_size; - next_ranges.push_back({merged_begin, merged_end}); + } else { + std::move(begin1, end1, output_iterator); } + auto merged_begin = output_iterator; + auto merged_end = merged_begin + output_size; + next_ranges.push_back({merged_begin, merged_end}); } ranges.swap(next_ranges); swap = !swap; diff --git a/include/builder/parse_file.hpp b/include/builder/parse_file.hpp index 0a74583..f269788 100644 --- a/include/builder/parse_file.hpp +++ b/include/builder/parse_file.hpp @@ -149,7 +149,7 @@ void parse_file(std::istream& is, parse_data& data, uint64_t i = 0; if constexpr (kmer_t::bits_per_char == 2) { -#if !defined(SSHASH_USE_TRADITIONAL_NUCLEOTIDE_ENCODING) and defined(__x86_64__) +#if !defined(SSHASH_USE_TRADITIONAL_NUCLEOTIDE_ENCODING) and defined(__AVX2__) /* process 32 bytes at a time */ for (; i + 32 <= n; i += 32) { @@ -214,7 +214,7 @@ void parse_file(std::istream& is, parse_data& data, std::vector threads; threads.reserve(num_threads); - for (uint64_t t = 0; t != num_threads; ++t) // + for (uint64_t t = 0; t * num_sequences_per_thread < num_sequences; ++t) // { threads.emplace_back([&, t] { std::vector buffer; @@ -249,7 +249,7 @@ void parse_file(std::istream& is, parse_data& data, minimizer_iterator minimizer_it(k, m, hasher); minimizer_iterator_rc minimizer_it_rc(k, m, hasher); - for (uint64_t i = index_begin; i != index_end; ++i) // + for (uint64_t i = index_begin; i < index_end; ++i) // { const uint64_t begin = data.pieces[i]; const uint64_t end = data.pieces[i + 1]; diff --git a/include/builder/util.hpp b/include/builder/util.hpp index d40d40f..0b9b2cc 100644 --- a/include/builder/util.hpp +++ b/include/builder/util.hpp @@ -3,7 +3,7 @@ #include #include -#if defined(__x86_64__) +#if defined(__AVX2__) #include #include #endif @@ -30,7 +30,7 @@ struct parse_runtime_error : public std::runtime_error { } } -#if defined(__x86_64__) +#if defined(__AVX2__) /* This function takes 32 bytes and packs the two bits in positions 1 and 2 (from right) of each byte into diff --git a/include/dictionary.hpp b/include/dictionary.hpp index b9d0c33..178a2d1 100644 --- a/include/dictionary.hpp +++ b/include/dictionary.hpp @@ -134,6 +134,9 @@ struct dictionary { visit_impl(visitor, *this); } + const buckets& data() const { return m_buckets; } + const minimizers& get_minimizers() const { return m_minimizers; } + private: template static void visit_impl(Visitor& visitor, T&& t) { diff --git a/include/kmer.hpp b/include/kmer.hpp index 25edd42..ea6b061 100644 --- a/include/kmer.hpp +++ b/include/kmer.hpp @@ -15,7 +15,7 @@ namespace sshash { template struct uint_kmer_t { using uint_t = Kmer; - Kmer kmer; + Kmer kmer = 0; uint_kmer_t() {} uint_kmer_t(uint64_t kmer) : kmer(kmer) {} @@ -284,6 +284,19 @@ struct aa_uint_kmer_t : alpha_kmer_t { static bool is_valid(char c) { return ~char_to_aa[static_cast(c)]; } static uint64_t char_to_uint(char c) { return char_to_aa[static_cast(c)]; } + + // For proteins, there's no reverse complement, so map each character to itself + // This allows streaming_query to work with protein alphabets + static constexpr char canonicalize_basepair_reverse_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 0, 0, 0, 0, 0, + 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; }; // also supports bitpack<__uint128_t, 1>, std::bitset<256>, etc diff --git a/include/util.hpp b/include/util.hpp index 3cb1247..e83fe8d 100644 --- a/include/util.hpp +++ b/include/util.hpp @@ -170,7 +170,7 @@ struct build_configuration { namespace util { -static void check_version_number(essentials::version_number const& vnum) { +static inline void check_version_number(essentials::version_number const& vnum) { if (vnum.x != constants::current_version_number::x) { throw std::runtime_error("MAJOR index version mismatch: SSHash index needs rebuilding"); } diff --git a/src/build.cpp b/src/build.cpp index 759326b..117f2d7 100644 --- a/src/build.cpp +++ b/src/build.cpp @@ -28,9 +28,6 @@ void dictionary::build(std::string const& filename, if (build_config.l >= constants::max_l) { throw std::runtime_error("l must be < " + std::to_string(constants::max_l)); } - if ((build_config.num_threads & (build_config.num_threads - 1)) != 0) { - throw std::runtime_error("number of threads must be a power of 2"); - } m_k = build_config.k; m_m = build_config.m; @@ -126,11 +123,11 @@ void dictionary::build(std::string const& filename, input.read(reinterpret_cast(buffer.data()), buffer.size() * sizeof(minimizer_tuple)); const uint64_t chunk_size = (n + num_threads - 1) / num_threads; - for (uint64_t t = 0; t != num_threads; ++t) { + for (uint64_t t = 0; t * chunk_size < n; ++t) { uint64_t begin = t * chunk_size; - uint64_t end = (t == num_threads - 1) ? n : begin + chunk_size; + uint64_t end = std::min(n, begin + chunk_size); threads.emplace_back([begin, end, &buffer, &f]() { - for (uint64_t i = begin; i != end; ++i) { + for (uint64_t i = begin; i < end; ++i) { buffer[i].minimizer = f.lookup(buffer[i].minimizer); } }); diff --git a/src/info.cpp b/src/info.cpp index 95d73d0..27ccdf9 100644 --- a/src/info.cpp +++ b/src/info.cpp @@ -2,10 +2,11 @@ namespace sshash { -static double bits_per_kmer_formula(uint64_t k, /* kmer length */ - uint64_t m, /* minimizer length */ - uint64_t n, /* num. kmers */ - uint64_t M) /* num. strings in SPSS */ +[[maybe_unused]] +static inline double bits_per_kmer_formula(uint64_t k, /* kmer length */ + uint64_t m, /* minimizer length */ + uint64_t n, /* num. kmers */ + uint64_t M) /* num. strings in SPSS */ { /* Caveats: @@ -35,7 +36,7 @@ static double bits_per_kmer_formula(uint64_t k, /* kmer length */ return num_bits / n; } -double perc(uint64_t amount, uint64_t total) { return (amount * 100.0) / total; } +inline double perc(uint64_t amount, uint64_t total) { return (amount * 100.0) / total; } template void dictionary::print_space_breakdown() const { diff --git a/tools/build.cpp b/tools/build.cpp index cb38f14..9c9d738 100644 --- a/tools/build.cpp +++ b/tools/build.cpp @@ -21,7 +21,7 @@ int build(int argc, char** argv) { parser.add("seed", "Seed for construction (default is " + std::to_string(constants::seed) + ").", "-s", false); - parser.add("t", "Number of threads (default is 1). Must be a power of 2.", "-t", false); + parser.add("t", "Number of threads (default is 1).", "-t", false); parser.add("l", "A (integer) constant that controls the space/time trade-off of the dictionary. " "A reasonable values lies in [2.." +