From b42015791672c4ec73e4d19747591f260a8e2772 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 04:37:38 +0000
Subject: [PATCH 01/15] Initial plan


From 530de74f2787939ff270255b3f75daa87333f1fb Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 04:43:21 +0000
Subject: [PATCH 02/15] Add minimal_perfect_hash policy and test suite

Co-authored-by: jll63 <5083077+jll63@users.noreply.github.com>
---
 .../policies/minimal_perfect_hash.hpp         | 290 ++++++++++++++++++
 test/test_minimal_perfect_hash.cpp            | 252 +++++++++++++++
 2 files changed, 542 insertions(+)
 create mode 100644 include/boost/openmethod/policies/minimal_perfect_hash.hpp
 create mode 100644 test/test_minimal_perfect_hash.cpp
diff --git a/include/boost/openmethod/policies/minimal_perfect_hash.hpp b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
new file mode 100644
index 00000000..67f93d84
--- /dev/null
+++ b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
@@ -0,0 +1,290 @@
+// Copyright (c) 2018-2025 Jean-Louis Leroy
+// Distributed under the Boost Software License, Version 1.0.
+// See accompanying file LICENSE_1_0.txt
+// or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_OPENMETHOD_POLICY_MINIMAL_PERFECT_HASH_HPP
+#define BOOST_OPENMETHOD_POLICY_MINIMAL_PERFECT_HASH_HPP
+
+#include <boost/openmethod/preamble.hpp>
+
+#include <limits>
+#include <random>
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4702) // unreachable code
+#endif
+
+namespace boost::openmethod {
+
+namespace detail {
+
+template<class Registry>
+std::vector<type_id> minimal_perfect_hash_control;
+
+} // namespace detail
+
+namespace policies {
+
+//! Hash type ids using a minimal perfect hash function.
+//!
+//! `minimal_perfect_hash` implements the @ref type_hash policy using a hash
+//! function in the form `H(x)=(M*x)>>N`. It uses the PtHash algorithm to
+//! determine values for `M` and `N` that result in a minimal perfect hash
+//! function for the set of registered type_ids. This means that the hash
+//! function is collision-free and the codomain is exactly the size of the
+//! domain, resulting in a dense range [0, n-1] for n inputs.
+struct minimal_perfect_hash : type_hash {
+
+    //! Cannot find hash factors
+    struct search_error : openmethod_error {
+        //! Number of attempts to find hash factors
+        std::size_t attempts;
+        //! Number of buckets used in the last attempt
+        std::size_t buckets;
+
+        //! Write a short description to an output stream
+        //! @param os The output stream
+        //! @tparam Registry The registry
+        //! @tparam Stream A @ref LightweightOutputStream
+        template<class Registry, class Stream>
+        auto write(Stream& os) const -> void;
+    };
+
+    using errors = std::variant<search_error>;
+
+    //! A TypeHashFn metafunction.
+    //!
+    //! @tparam Registry The registry containing this policy
+    template<class Registry>
+    class fn {
+        static std::size_t mult;
+        static std::size_t shift;
+        static std::size_t min_value;
+        static std::size_t max_value;
+
+        static void check(std::size_t index, type_id type);
+
+        template<class InitializeContext, class... Options>
+        static void initialize(
+            const InitializeContext& ctx, std::vector<type_id>& buckets,
+            const std::tuple<Options...>& options);
+
+      public:
+        //! Find the hash factors
+        //!
+        //! Attempts to find suitable values for the multiplication factor `M`
+        //! and the shift amount `N` that result in a minimal perfect hash
+        //! function for the specified input values.
+        //!
+        //! If no suitable values are found, calls the error handler with
+        //! a @ref hash_error object then calls `abort`.
+        //!
+        //! @tparam Context An @ref InitializeContext.
+        //! @param ctx A Context object.
+        //! @return A pair containing the minimum and maximum hash values.
+        template<class Context, class... Options>
+        static auto
+        initialize(const Context& ctx, const std::tuple<Options...>& options) {
+            if constexpr (Registry::has_runtime_checks) {
+                initialize(
+                    ctx, detail::minimal_perfect_hash_control<Registry>, options);
+            } else {
+                std::vector<type_id> buckets;
+                initialize(ctx, buckets, options);
+            }
+
+            return std::pair{min_value, max_value};
+        }
+
+        //! Hash a type id
+        //!
+        //! Hash a type id.
+        //!
+        //! If `Registry` contains the @ref runtime_checks policy, checks that
+        //! the type id is valid, i.e. if it was present in the set passed to
+        //! @ref initialize. Its absence indicates that a class involved in a
+        //! method definition, method overrider, or method call was not
+        //! registered. In this case, signal a @ref missing_class using
+        //! the registry's @ref error_handler if present; then calls `abort`.
+        //!
+        //! @param type The type_id to hash
+        //! @return The hash value
+        BOOST_FORCEINLINE
+        static auto hash(type_id type) -> std::size_t {
+            auto index =
+                (mult * reinterpret_cast<detail::uintptr>(type)) >> shift;
+
+            if constexpr (Registry::has_runtime_checks) {
+                check(index, type);
+            }
+
+            return index;
+        }
+
+        //! Releases the memory allocated by `initialize`.
+        //!
+        //! @tparam Options... Zero or more option types, deduced from the function
+        //! arguments.
+        //! @param options Zero or more option objects.
+        template<class... Options>
+        static auto finalize(const std::tuple<Options...>&) -> void {
+            detail::minimal_perfect_hash_control<Registry>.clear();
+        }
+    };
+};
+
+template<class Registry>
+std::size_t minimal_perfect_hash::fn<Registry>::mult;
+
+template<class Registry>
+std::size_t minimal_perfect_hash::fn<Registry>::shift;
+
+template<class Registry>
+std::size_t minimal_perfect_hash::fn<Registry>::min_value;
+
+template<class Registry>
+std::size_t minimal_perfect_hash::fn<Registry>::max_value;
+
+template<class Registry>
+template<class InitializeContext, class... Options>
+void minimal_perfect_hash::fn<Registry>::initialize(
+    const InitializeContext& ctx, std::vector<type_id>& buckets,
+    const std::tuple<Options...>& options) {
+    (void)options;
+
+    const auto N = std::distance(ctx.classes_begin(), ctx.classes_end());
+
+    if constexpr (mp11::mp_contains<mp11::mp_list<Options...>, trace>::value) {
+        Registry::output::os << "Finding minimal perfect hash factors for " << N << " types\n";
+    }
+
+    // For minimal perfect hash, we need exactly N buckets
+    std::size_t hash_size = N;
+    
+    if (hash_size == 0) {
+        min_value = 0;
+        max_value = 0;
+        shift = 0;
+        mult = 1;
+        return;
+    }
+
+    std::default_random_engine rnd(13081963);
+    std::size_t total_attempts = 0;
+    
+    // Calculate M (number of bits needed to represent hash_size)
+    std::size_t M = 0;
+    for (auto size = hash_size; size > 0; size >>= 1) {
+        ++M;
+    }
+    if (M > 0) {
+        M--;
+    }
+
+    std::uniform_int_distribution<std::size_t> uniform_dist;
+
+    // Try increasing values of M for better distribution
+    for (std::size_t pass = 0; pass < 4; ++pass, ++M) {
+        shift = 8 * sizeof(type_id) - M;
+        min_value = (std::numeric_limits<std::size_t>::max)();
+        max_value = (std::numeric_limits<std::size_t>::min)();
+
+        if constexpr (InitializeContext::template has_option<trace>) {
+            ctx.tr << "  trying with M = " << M << ", " << hash_size
+                   << " buckets (minimal)\n";
+        }
+
+        std::size_t attempts = 0;
+        buckets.resize(hash_size);
+
+        while (attempts < 100000) {
+            std::fill(
+                buckets.begin(), buckets.end(), type_id(detail::uintptr_max));
+            ++attempts;
+            ++total_attempts;
+            mult = uniform_dist(rnd) | 1;
+
+            bool collision_found = false;
+            for (auto iter = ctx.classes_begin(); iter != ctx.classes_end();
+                 ++iter) {
+                for (auto type_iter = iter->type_id_begin();
+                     type_iter != iter->type_id_end(); ++type_iter) {
+                    auto type = *type_iter;
+                    auto index = (detail::uintptr(type) * mult) >> shift;
+                    
+                    // For minimal perfect hash, index must be in [0, N)
+                    if (index >= hash_size) {
+                        collision_found = true;
+                        goto collision;
+                    }
+                    
+                    min_value = (std::min)(min_value, index);
+                    max_value = (std::max)(max_value, index);
+
+                    if (detail::uintptr(buckets[index]) !=
+                        detail::uintptr_max) {
+                        collision_found = true;
+                        goto collision;
+                    }
+
+                    buckets[index] = type;
+                }
+            }
+
+            // Verify that we have a minimal perfect hash (all buckets used)
+            for (std::size_t i = 0; i < hash_size; ++i) {
+                if (detail::uintptr(buckets[i]) == detail::uintptr_max) {
+                    collision_found = true;
+                    goto collision;
+                }
+            }
+
+            if constexpr (InitializeContext::template has_option<trace>) {
+                ctx.tr << "  found " << mult << " after " << total_attempts
+                       << " attempts; span = [" << min_value << ", "
+                       << max_value << "], size = " << (max_value - min_value + 1) << "\n";
+            }
+
+            return;
+
+        collision: {}
+        }
+    }
+
+    search_error error;
+    error.attempts = total_attempts;
+    error.buckets = hash_size;
+
+    if constexpr (Registry::has_error_handler) {
+        Registry::error_handler::error(error);
+    }
+
+    abort();
+}
+
+template<class Registry>
+void minimal_perfect_hash::fn<Registry>::check(std::size_t index, type_id type) {
+    if (index < min_value || index > max_value ||
+        detail::minimal_perfect_hash_control<Registry>[index] != type) {
+
+        if constexpr (Registry::has_error_handler) {
+            missing_class error;
+            error.type = type;
+            Registry::error_handler::error(error);
+        }
+
+        abort();
+    }
+}
+
+template<class Registry, class Stream>
+auto minimal_perfect_hash::search_error::write(Stream& os) const -> void {
+    os << "could not find minimal perfect hash factors after " << attempts 
+       << " attempts using " << buckets << " buckets\n";
+}
+
+} // namespace policies
+} // namespace boost::openmethod
+
+#endif
diff --git a/test/test_minimal_perfect_hash.cpp b/test/test_minimal_perfect_hash.cpp
new file mode 100644
index 00000000..13c0b407
--- /dev/null
+++ b/test/test_minimal_perfect_hash.cpp
@@ -0,0 +1,252 @@
+// Copyright (c) 2018-2025 Jean-Louis Leroy
+// Distributed under the Boost Software License, Version 1.0.
+// See accompanying file LICENSE_1_0.txt
+// or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <string>
+#include <set>
+
+#define BOOST_TEST_MODULE minimal_perfect_hash
+#include <boost/test/unit_test.hpp>
+
+#include <boost/openmethod.hpp>
+#include <boost/openmethod/policies/minimal_perfect_hash.hpp>
+#include <boost/openmethod/policies/std_rtti.hpp>
+#include <boost/openmethod/policies/vptr_vector.hpp>
+#include <boost/openmethod/policies/stderr_output.hpp>
+#include <boost/openmethod/policies/default_error_handler.hpp>
+#include <boost/openmethod/initialize.hpp>
+
+#include "test_util.hpp"
+
+using namespace boost::openmethod;
+using namespace boost::openmethod::policies;
+
+// Test registry with minimal_perfect_hash
+struct minimal_hash_registry
+    : registry<
+          std_rtti, vptr_vector, minimal_perfect_hash,
+          default_error_handler, stderr_output> {
+};
+
+// Test registry with runtime checks
+struct minimal_hash_registry_with_checks
+    : registry<
+          std_rtti, vptr_vector, minimal_perfect_hash,
+          default_error_handler, stderr_output, runtime_checks> {
+};
+
+namespace test_basic {
+
+struct Animal {
+    virtual ~Animal() {}
+};
+
+struct Dog : Animal {};
+struct Cat : Animal {};
+struct Bird : Animal {};
+
+BOOST_OPENMETHOD_CLASSES(Animal, Dog, Cat, Bird, minimal_hash_registry);
+
+BOOST_OPENMETHOD(get_sound, (virtual_<const Animal&>), std::string, minimal_hash_registry);
+
+BOOST_OPENMETHOD_OVERRIDE(get_sound, (const Dog&), std::string) {
+    return "woof";
+}
+
+BOOST_OPENMETHOD_OVERRIDE(get_sound, (const Cat&), std::string) {
+    return "meow";
+}
+
+BOOST_OPENMETHOD_OVERRIDE(get_sound, (const Bird&), std::string) {
+    return "chirp";
+}
+
+BOOST_AUTO_TEST_CASE(basic_functionality) {
+    initialize<minimal_hash_registry>();
+
+    Dog dog;
+    Cat cat;
+    Bird bird;
+
+    BOOST_TEST(get_sound(dog) == "woof");
+    BOOST_TEST(get_sound(cat) == "meow");
+    BOOST_TEST(get_sound(bird) == "chirp");
+}
+
+} // namespace test_basic
+
+namespace test_hash_properties {
+
+struct Base {
+    virtual ~Base() {}
+};
+
+struct D1 : Base {};
+struct D2 : Base {};
+struct D3 : Base {};
+struct D4 : Base {};
+struct D5 : Base {};
+
+BOOST_OPENMETHOD_CLASSES(Base, D1, D2, D3, D4, D5, minimal_hash_registry);
+
+BOOST_OPENMETHOD(get_id, (virtual_<const Base&>), int, minimal_hash_registry);
+
+BOOST_OPENMETHOD_OVERRIDE(get_id, (const D1&), int) {
+    return 1;
+}
+
+BOOST_OPENMETHOD_OVERRIDE(get_id, (const D2&), int) {
+    return 2;
+}
+
+BOOST_OPENMETHOD_OVERRIDE(get_id, (const D3&), int) {
+    return 3;
+}
+
+BOOST_OPENMETHOD_OVERRIDE(get_id, (const D4&), int) {
+    return 4;
+}
+
+BOOST_OPENMETHOD_OVERRIDE(get_id, (const D5&), int) {
+    return 5;
+}
+
+BOOST_AUTO_TEST_CASE(minimal_hash_properties) {
+    initialize<minimal_hash_registry>();
+
+    // Test that all classes are correctly hashed
+    D1 d1;
+    D2 d2;
+    D3 d3;
+    D4 d4;
+    D5 d5;
+
+    BOOST_TEST(get_id(d1) == 1);
+    BOOST_TEST(get_id(d2) == 2);
+    BOOST_TEST(get_id(d3) == 3);
+    BOOST_TEST(get_id(d4) == 4);
+    BOOST_TEST(get_id(d5) == 5);
+    
+    // Verify that the hash function produces a minimal perfect hash
+    // (This is implicit - if it didn't, initialization would fail or we'd get wrong results)
+}
+
+} // namespace test_hash_properties
+
+namespace test_with_runtime_checks {
+
+struct Vehicle {
+    virtual ~Vehicle() {}
+};
+
+struct Car : Vehicle {};
+struct Bike : Vehicle {};
+
+BOOST_OPENMETHOD_CLASSES(Vehicle, Car, Bike, minimal_hash_registry_with_checks);
+
+BOOST_OPENMETHOD(get_wheels, (virtual_<const Vehicle&>), int, minimal_hash_registry_with_checks);
+
+BOOST_OPENMETHOD_OVERRIDE(get_wheels, (const Car&), int) {
+    return 4;
+}
+
+BOOST_OPENMETHOD_OVERRIDE(get_wheels, (const Bike&), int) {
+    return 2;
+}
+
+BOOST_AUTO_TEST_CASE(runtime_checks) {
+    initialize<minimal_hash_registry_with_checks>();
+
+    Car car;
+    Bike bike;
+
+    BOOST_TEST(get_wheels(car) == 4);
+    BOOST_TEST(get_wheels(bike) == 2);
+}
+
+} // namespace test_with_runtime_checks
+
+namespace test_empty {
+
+struct Empty {
+    virtual ~Empty() {}
+};
+
+BOOST_OPENMETHOD_CLASSES(Empty, minimal_hash_registry);
+
+BOOST_OPENMETHOD(process, (virtual_<const Empty&>), int, minimal_hash_registry);
+
+BOOST_OPENMETHOD_OVERRIDE(process, (const Empty&), int) {
+    return 42;
+}
+
+BOOST_AUTO_TEST_CASE(single_class) {
+    initialize<minimal_hash_registry>();
+
+    Empty e;
+    BOOST_TEST(process(e) == 42);
+}
+
+} // namespace test_empty
+
+namespace test_large_hierarchy {
+
+struct Root {
+    virtual ~Root() {}
+};
+
+struct L1_1 : Root {};
+struct L1_2 : Root {};
+struct L1_3 : Root {};
+struct L1_4 : Root {};
+struct L1_5 : Root {};
+struct L1_6 : Root {};
+struct L1_7 : Root {};
+struct L1_8 : Root {};
+struct L1_9 : Root {};
+struct L1_10 : Root {};
+
+BOOST_OPENMETHOD_CLASSES(Root, L1_1, L1_2, L1_3, L1_4, L1_5, L1_6, L1_7, L1_8, L1_9, L1_10, minimal_hash_registry);
+
+BOOST_OPENMETHOD(classify, (virtual_<const Root&>), int, minimal_hash_registry);
+
+BOOST_OPENMETHOD_OVERRIDE(classify, (const L1_1&), int) { return 1; }
+BOOST_OPENMETHOD_OVERRIDE(classify, (const L1_2&), int) { return 2; }
+BOOST_OPENMETHOD_OVERRIDE(classify, (const L1_3&), int) { return 3; }
+BOOST_OPENMETHOD_OVERRIDE(classify, (const L1_4&), int) { return 4; }
+BOOST_OPENMETHOD_OVERRIDE(classify, (const L1_5&), int) { return 5; }
+BOOST_OPENMETHOD_OVERRIDE(classify, (const L1_6&), int) { return 6; }
+BOOST_OPENMETHOD_OVERRIDE(classify, (const L1_7&), int) { return 7; }
+BOOST_OPENMETHOD_OVERRIDE(classify, (const L1_8&), int) { return 8; }
+BOOST_OPENMETHOD_OVERRIDE(classify, (const L1_9&), int) { return 9; }
+BOOST_OPENMETHOD_OVERRIDE(classify, (const L1_10&), int) { return 10; }
+
+BOOST_AUTO_TEST_CASE(larger_hierarchy) {
+    initialize<minimal_hash_registry>();
+
+    L1_1 o1;
+    L1_2 o2;
+    L1_3 o3;
+    L1_4 o4;
+    L1_5 o5;
+    L1_6 o6;
+    L1_7 o7;
+    L1_8 o8;
+    L1_9 o9;
+    L1_10 o10;
+
+    BOOST_TEST(classify(o1) == 1);
+    BOOST_TEST(classify(o2) == 2);
+    BOOST_TEST(classify(o3) == 3);
+    BOOST_TEST(classify(o4) == 4);
+    BOOST_TEST(classify(o5) == 5);
+    BOOST_TEST(classify(o6) == 6);
+    BOOST_TEST(classify(o7) == 7);
+    BOOST_TEST(classify(o8) == 8);
+    BOOST_TEST(classify(o9) == 9);
+    BOOST_TEST(classify(o10) == 10);
+}
+
+} // namespace test_large_hierarchy

From 63d23e79a43a23c50d87a5a58e995a12d20232c7 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 04:47:10 +0000
Subject: [PATCH 03/15] Fix M calculation for minimal perfect hash to ensure
 enough bits

Co-authored-by: jll63 <5083077+jll63@users.noreply.github.com>
---
 .../policies/minimal_perfect_hash.hpp         | 26 +++++++++++++++----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/include/boost/openmethod/policies/minimal_perfect_hash.hpp b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
index 67f93d84..456a9e3d 100644
--- a/include/boost/openmethod/policies/minimal_perfect_hash.hpp
+++ b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
@@ -169,18 +169,34 @@ void minimal_perfect_hash::fn<Registry>::initialize(
         mult = 1;
         return;
     }
+    
+    if (hash_size == 1) {
+        // Special case: only one type, any hash function works
+        min_value = 0;
+        max_value = 0;
+        shift = 8 * sizeof(type_id); // Shift everything away, result is always 0
+        mult = 1;
+        buckets.resize(1);
+        for (auto iter = ctx.classes_begin(); iter != ctx.classes_end(); ++iter) {
+            for (auto type_iter = iter->type_id_begin();
+                 type_iter != iter->type_id_end(); ++type_iter) {
+                buckets[0] = *type_iter;
+            }
+        }
+        return;
+    }
 
     std::default_random_engine rnd(13081963);
     std::size_t total_attempts = 0;
     
-    // Calculate M (number of bits needed to represent hash_size)
+    // Calculate M (number of bits needed for the hash range)
+    // We need 2^M >= hash_size, so M = ceil(log2(hash_size))
     std::size_t M = 0;
-    for (auto size = hash_size; size > 0; size >>= 1) {
+    std::size_t power = 1;
+    while (power < hash_size) {
+        power <<= 1;
         ++M;
     }
-    if (M > 0) {
-        M--;
-    }
 
     std::uniform_int_distribution<std::size_t> uniform_dist;
 

From dc906d8a1f9bfdcf89ec1c75ed591b310f1c4f9b Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 04:48:02 +0000
Subject: [PATCH 04/15] Add comments clarifying minimal perfect hash property

Co-authored-by: jll63 <5083077+jll63@users.noreply.github.com>
---
 .../boost/openmethod/policies/minimal_perfect_hash.hpp   | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/include/boost/openmethod/policies/minimal_perfect_hash.hpp b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
index 456a9e3d..8ee2e419 100644
--- a/include/boost/openmethod/policies/minimal_perfect_hash.hpp
+++ b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
@@ -34,6 +34,12 @@ namespace policies {
 //! function for the set of registered type_ids. This means that the hash
 //! function is collision-free and the codomain is exactly the size of the
 //! domain, resulting in a dense range [0, n-1] for n inputs.
+//!
+//! Unlike @ref fast_perfect_hash, which uses a hash table of size 2^k
+//! (typically larger than needed) and may have unused slots, this policy
+//! ensures the hash table has exactly n slots for n type_ids, with all
+//! slots filled. This minimizes memory usage but may require more search
+//! attempts during initialization.
 struct minimal_perfect_hash : type_hash {
 
     //! Cannot find hash factors
@@ -249,6 +255,9 @@ void minimal_perfect_hash::fn<Registry>::initialize(
             }
 
             // Verify that we have a minimal perfect hash (all buckets used)
+            // This is the key difference from fast_perfect_hash: we require that
+            // all N buckets are filled, ensuring the codomain size equals the
+            // domain size, making it a truly minimal perfect hash function.
             for (std::size_t i = 0; i < hash_size; ++i) {
                 if (detail::uintptr(buckets[i]) == detail::uintptr_max) {
                     collision_found = true;

From 88984a1d14644f53f76c9551b062047721af6d96 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 04:52:59 +0000
Subject: [PATCH 05/15] Implement proper PtHash algorithm with displacement
 tables

Co-authored-by: jll63 <5083077+jll63@users.noreply.github.com>
---
 .../policies/minimal_perfect_hash.hpp         | 252 ++++++++++++------
 1 file changed, 164 insertions(+), 88 deletions(-)

diff --git a/include/boost/openmethod/policies/minimal_perfect_hash.hpp b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
index 8ee2e419..f049ab25 100644
--- a/include/boost/openmethod/policies/minimal_perfect_hash.hpp
+++ b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
@@ -10,6 +10,8 @@
 
 #include <limits>
 #include <random>
+#include <vector>
+#include <algorithm>
 #ifdef _MSC_VER
 #pragma warning(push)
 #pragma warning(disable : 4702) // unreachable code
@@ -19,9 +21,23 @@ namespace boost::openmethod {
 
 namespace detail {
 
+#if defined(UINTPTR_MAX)
+using uintptr = std::uintptr_t;
+constexpr uintptr uintptr_max = UINTPTR_MAX;
+#else
+static_assert(
+    sizeof(std::size_t) == sizeof(void*),
+    "This implementation requires that size_t and void* have the same size.");
+using uintptr = std::size_t;
+constexpr uintptr uintptr_max = (std::numeric_limits<std::size_t>::max)();
+#endif
+
 template<class Registry>
 std::vector<type_id> minimal_perfect_hash_control;
 
+template<class Registry>
+std::vector<std::size_t> minimal_perfect_hash_displacements;
+
 } // namespace detail
 
 namespace policies {
@@ -66,8 +82,10 @@ struct minimal_perfect_hash : type_hash {
     class fn {
         static std::size_t mult;
         static std::size_t shift;
-        static std::size_t min_value;
-        static std::size_t max_value;
+        static std::size_t table_size;  // N for minimal perfect hash
+        static std::size_t num_groups;
+        static std::size_t group_mult;
+        static std::size_t group_shift;
 
         static void check(std::size_t index, type_id type);
 
@@ -77,18 +95,19 @@ struct minimal_perfect_hash : type_hash {
             const std::tuple<Options...>& options);
 
       public:
-        //! Find the hash factors
+        //! Find the hash factors using PtHash algorithm
         //!
-        //! Attempts to find suitable values for the multiplication factor `M`
-        //! and the shift amount `N` that result in a minimal perfect hash
-        //! function for the specified input values.
+        //! Uses the PtHash algorithm to find:
+        //! - Pilot hash parameters (M, N) for H(x) = (M * x) >> N
+        //! - Bucket assignment parameters 
+        //! - Displacement values for each bucket to achieve minimal perfect hashing
         //!
         //! If no suitable values are found, calls the error handler with
-        //! a @ref hash_error object then calls `abort`.
+        //! a @ref search_error object then calls `abort`.
         //!
         //! @tparam Context An @ref InitializeContext.
         //! @param ctx A Context object.
-        //! @return A pair containing the minimum and maximum hash values.
+        //! @return A pair containing the minimum (0) and maximum (n-1) hash values.
         template<class Context, class... Options>
         static auto
         initialize(const Context& ctx, const std::tuple<Options...>& options) {
@@ -100,12 +119,13 @@ struct minimal_perfect_hash : type_hash {
                 initialize(ctx, buckets, options);
             }
 
-            return std::pair{min_value, max_value};
+            return std::pair{std::size_t(0), table_size - 1};
         }
 
-        //! Hash a type id
+        //! Hash a type id using the PtHash algorithm
         //!
-        //! Hash a type id.
+        //! Hash a type id using H(x) = (pilot(x) + disp[group(x)]) % N
+        //! where pilot(x) = (M * x) >> S and group(x) = (GM * x) >> GS.
         //!
         //! If `Registry` contains the @ref runtime_checks policy, checks that
         //! the type id is valid, i.e. if it was present in the set passed to
@@ -118,8 +138,9 @@ struct minimal_perfect_hash : type_hash {
         //! @return The hash value
         BOOST_FORCEINLINE
         static auto hash(type_id type) -> std::size_t {
-            auto index =
-                (mult * reinterpret_cast<detail::uintptr>(type)) >> shift;
+            auto pilot = (mult * reinterpret_cast<detail::uintptr>(type)) >> shift;
+            auto group = (group_mult * reinterpret_cast<detail::uintptr>(type)) >> group_shift;
+            auto index = (pilot + detail::minimal_perfect_hash_displacements<Registry>[group]) % table_size;
 
             if constexpr (Registry::has_runtime_checks) {
                 check(index, type);
@@ -136,6 +157,7 @@ struct minimal_perfect_hash : type_hash {
         template<class... Options>
         static auto finalize(const std::tuple<Options...>&) -> void {
             detail::minimal_perfect_hash_control<Registry>.clear();
+            detail::minimal_perfect_hash_displacements<Registry>.clear();
         }
     };
 };
@@ -147,10 +169,16 @@ template<class Registry>
 std::size_t minimal_perfect_hash::fn<Registry>::shift;
 
 template<class Registry>
-std::size_t minimal_perfect_hash::fn<Registry>::min_value;
+std::size_t minimal_perfect_hash::fn<Registry>::table_size;
+
+template<class Registry>
+std::size_t minimal_perfect_hash::fn<Registry>::num_groups;
+
+template<class Registry>
+std::size_t minimal_perfect_hash::fn<Registry>::group_mult;
 
 template<class Registry>
-std::size_t minimal_perfect_hash::fn<Registry>::max_value;
+std::size_t minimal_perfect_hash::fn<Registry>::group_shift;
 
 template<class Registry>
 template<class InitializeContext, class... Options>
@@ -162,26 +190,30 @@ void minimal_perfect_hash::fn<Registry>::initialize(
     const auto N = std::distance(ctx.classes_begin(), ctx.classes_end());
 
     if constexpr (mp11::mp_contains<mp11::mp_list<Options...>, trace>::value) {
-        Registry::output::os << "Finding minimal perfect hash factors for " << N << " types\n";
+        Registry::output::os << "Finding minimal perfect hash using PtHash for " << N << " types\n";
     }
 
-    // For minimal perfect hash, we need exactly N buckets
-    std::size_t hash_size = N;
+    // Table size is exactly N for minimal perfect hash
+    table_size = N;
     
-    if (hash_size == 0) {
-        min_value = 0;
-        max_value = 0;
+    if (table_size == 0) {
         shift = 0;
         mult = 1;
+        num_groups = 0;
+        group_mult = 1;
+        group_shift = 0;
+        detail::minimal_perfect_hash_displacements<Registry>.clear();
         return;
     }
     
-    if (hash_size == 1) {
-        // Special case: only one type, any hash function works
-        min_value = 0;
-        max_value = 0;
-        shift = 8 * sizeof(type_id); // Shift everything away, result is always 0
+    if (table_size == 1) {
+        // Special case: only one type
+        shift = 8 * sizeof(type_id);
         mult = 1;
+        num_groups = 1;
+        group_mult = 1;
+        group_shift = 8 * sizeof(type_id);
+        detail::minimal_perfect_hash_displacements<Registry>.assign(1, 0);
         buckets.resize(1);
         for (auto iter = ctx.classes_begin(); iter != ctx.classes_end(); ++iter) {
             for (auto type_iter = iter->type_id_begin();
@@ -192,94 +224,138 @@ void minimal_perfect_hash::fn<Registry>::initialize(
         return;
     }
 
+    // Collect all type_ids
+    std::vector<type_id> keys;
+    for (auto iter = ctx.classes_begin(); iter != ctx.classes_end(); ++iter) {
+        for (auto type_iter = iter->type_id_begin();
+             type_iter != iter->type_id_end(); ++type_iter) {
+            keys.push_back(*type_iter);
+        }
+    }
+
     std::default_random_engine rnd(13081963);
+    std::uniform_int_distribution<std::size_t> uniform_dist;
     std::size_t total_attempts = 0;
+
+    // PtHash algorithm: partition keys into groups, then find displacements
+    // Number of groups: typically sqrt(N) to N/4 for good performance
+    num_groups = (std::max)(std::size_t(1), table_size / 4);
+    if (num_groups > table_size) num_groups = table_size;
     
-    // Calculate M (number of bits needed for the hash range)
-    // We need 2^M >= hash_size, so M = ceil(log2(hash_size))
-    std::size_t M = 0;
+    // Calculate bits needed for num_groups
+    std::size_t GM = 0;
     std::size_t power = 1;
-    while (power < hash_size) {
+    while (power < num_groups) {
         power <<= 1;
-        ++M;
+        ++GM;
     }
+    group_shift = 8 * sizeof(type_id) - GM;
 
-    std::uniform_int_distribution<std::size_t> uniform_dist;
+    if constexpr (InitializeContext::template has_option<trace>) {
+        ctx.tr << "  Using " << num_groups << " groups for " << table_size << " keys\n";
+    }
 
-    // Try increasing values of M for better distribution
-    for (std::size_t pass = 0; pass < 4; ++pass, ++M) {
+    // Try different pilot hash parameters
+    for (std::size_t pass = 0; pass < 10 && total_attempts < 100000; ++pass) {
+        mult = uniform_dist(rnd) | 1;
+        group_mult = uniform_dist(rnd) | 1;
+        
+        // Calculate M for pilot hash (number of bits for table_size range)
+        std::size_t M = 0;
+        power = 1;
+        while (power < table_size * 2) {  // Use 2*N for better distribution
+            power <<= 1;
+            ++M;
+        }
         shift = 8 * sizeof(type_id) - M;
-        min_value = (std::numeric_limits<std::size_t>::max)();
-        max_value = (std::numeric_limits<std::size_t>::min)();
 
-        if constexpr (InitializeContext::template has_option<trace>) {
-            ctx.tr << "  trying with M = " << M << ", " << hash_size
-                   << " buckets (minimal)\n";
+        // Partition keys into groups
+        std::vector<std::vector<type_id>> groups(num_groups);
+        for (auto key : keys) {
+            auto group_idx = ((group_mult * reinterpret_cast<detail::uintptr>(key)) >> group_shift) % num_groups;
+            groups[group_idx].push_back(key);
         }
 
-        std::size_t attempts = 0;
-        buckets.resize(hash_size);
-
-        while (attempts < 100000) {
-            std::fill(
-                buckets.begin(), buckets.end(), type_id(detail::uintptr_max));
-            ++attempts;
-            ++total_attempts;
-            mult = uniform_dist(rnd) | 1;
-
-            bool collision_found = false;
-            for (auto iter = ctx.classes_begin(); iter != ctx.classes_end();
-                 ++iter) {
-                for (auto type_iter = iter->type_id_begin();
-                     type_iter != iter->type_id_end(); ++type_iter) {
-                    auto type = *type_iter;
-                    auto index = (detail::uintptr(type) * mult) >> shift;
-                    
-                    // For minimal perfect hash, index must be in [0, N)
-                    if (index >= hash_size) {
-                        collision_found = true;
-                        goto collision;
-                    }
-                    
-                    min_value = (std::min)(min_value, index);
-                    max_value = (std::max)(max_value, index);
-
-                    if (detail::uintptr(buckets[index]) !=
-                        detail::uintptr_max) {
-                        collision_found = true;
-                        goto collision;
+        // Try to find displacements for each group
+        detail::minimal_perfect_hash_displacements<Registry>.assign(num_groups, 0);
+        buckets.assign(table_size, type_id(detail::uintptr_max));
+        std::vector<bool> used(table_size, false);
+        bool success = true;
+
+        // Process groups in descending order of size (larger groups first)
+        std::vector<std::size_t> group_order(num_groups);
+        for (std::size_t i = 0; i < num_groups; ++i) group_order[i] = i;
+        std::sort(group_order.begin(), group_order.end(), 
+                  [&groups](std::size_t a, std::size_t b) {
+                      return groups[a].size() > groups[b].size();
+                  });
+
+        for (auto g : group_order) {
+            if (groups[g].empty()) continue;
+
+            // Try different displacement values
+            bool found = false;
+            for (std::size_t disp = 0; disp < table_size * 2 && !found; ++disp) {
+                ++total_attempts;
+                if (total_attempts > 100000) {
+                    success = false;
+                    break;
+                }
+
+                // Check if this displacement works for all keys in group
+                std::vector<std::size_t> positions;
+                bool valid = true;
+                for (auto key : groups[g]) {
+                    auto pilot = (mult * reinterpret_cast<detail::uintptr>(key)) >> shift;
+                    auto pos = (pilot + disp) % table_size;
+                    if (used[pos]) {
+                        valid = false;
+                        break;
                     }
+                    positions.push_back(pos);
+                }
 
-                    buckets[index] = type;
+                if (valid) {
+                    // Mark positions as used and store keys
+                    detail::minimal_perfect_hash_displacements<Registry>[g] = disp;
+                    for (std::size_t i = 0; i < groups[g].size(); ++i) {
+                        used[positions[i]] = true;
+                        buckets[positions[i]] = groups[g][i];
+                    }
+                    found = true;
                 }
             }
 
-            // Verify that we have a minimal perfect hash (all buckets used)
-            // This is the key difference from fast_perfect_hash: we require that
-            // all N buckets are filled, ensuring the codomain size equals the
-            // domain size, making it a truly minimal perfect hash function.
-            for (std::size_t i = 0; i < hash_size; ++i) {
+            if (!found) {
+                success = false;
+                break;
+            }
+        }
+
+        if (success) {
+            // Verify all positions are used (minimal property)
+            bool all_used = true;
+            for (std::size_t i = 0; i < table_size; ++i) {
                 if (detail::uintptr(buckets[i]) == detail::uintptr_max) {
-                    collision_found = true;
-                    goto collision;
+                    all_used = false;
+                    break;
                 }
             }
 
-            if constexpr (InitializeContext::template has_option<trace>) {
-                ctx.tr << "  found " << mult << " after " << total_attempts
-                       << " attempts; span = [" << min_value << ", "
-                       << max_value << "], size = " << (max_value - min_value + 1) << "\n";
+            if (all_used) {
+                if constexpr (InitializeContext::template has_option<trace>) {
+                    ctx.tr << "  Found minimal perfect hash after " << total_attempts
+                           << " attempts\n";
+                }
+                return;
             }
-
-            return;
-
-        collision: {}
         }
     }
 
+    // Failed to find minimal perfect hash
     search_error error;
     error.attempts = total_attempts;
-    error.buckets = hash_size;
+    error.buckets = table_size;
 
     if constexpr (Registry::has_error_handler) {
         Registry::error_handler::error(error);
@@ -290,7 +366,7 @@ void minimal_perfect_hash::fn<Registry>::initialize(
 
 template<class Registry>
 void minimal_perfect_hash::fn<Registry>::check(std::size_t index, type_id type) {
-    if (index < min_value || index > max_value ||
+    if (index >= table_size ||
         detail::minimal_perfect_hash_control<Registry>[index] != type) {
 
         if constexpr (Registry::has_error_handler) {

From fe45caf13cef39e5e539dedd43f357c36e038b4c Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 04:55:03 +0000
Subject: [PATCH 06/15] Address code review feedback: add named constants and
 fix trace checking

Co-authored-by: jll63 <5083077+jll63@users.noreply.github.com>
---
 .../policies/minimal_perfect_hash.hpp         | 33 ++++++++++++-------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/include/boost/openmethod/policies/minimal_perfect_hash.hpp b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
index f049ab25..b9e93a09 100644
--- a/include/boost/openmethod/policies/minimal_perfect_hash.hpp
+++ b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
@@ -189,8 +189,8 @@ void minimal_perfect_hash::fn<Registry>::initialize(
 
     const auto N = std::distance(ctx.classes_begin(), ctx.classes_end());
 
-    if constexpr (mp11::mp_contains<mp11::mp_list<Options...>, trace>::value) {
-        Registry::output::os << "Finding minimal perfect hash using PtHash for " << N << " types\n";
+    if constexpr (InitializeContext::template has_option<trace>) {
+        ctx.tr << "Finding minimal perfect hash using PtHash for " << N << " types\n";
     }
 
     // Table size is exactly N for minimal perfect hash
@@ -208,11 +208,12 @@ void minimal_perfect_hash::fn<Registry>::initialize(
     
     if (table_size == 1) {
         // Special case: only one type
-        shift = 8 * sizeof(type_id);
+        constexpr std::size_t bits_per_type_id = 8 * sizeof(type_id);
+        shift = bits_per_type_id;
         mult = 1;
         num_groups = 1;
         group_mult = 1;
-        group_shift = 8 * sizeof(type_id);
+        group_shift = bits_per_type_id;
         detail::minimal_perfect_hash_displacements<Registry>.assign(1, 0);
         buckets.resize(1);
         for (auto iter = ctx.classes_begin(); iter != ctx.classes_end(); ++iter) {
@@ -233,13 +234,21 @@ void minimal_perfect_hash::fn<Registry>::initialize(
         }
     }
 
-    std::default_random_engine rnd(13081963);
+    // Constants for PtHash algorithm
+    constexpr std::size_t DEFAULT_RANDOM_SEED = 13081963; // Same seed as fast_perfect_hash
+    constexpr std::size_t MAX_PASSES = 10;
+    constexpr std::size_t MAX_ATTEMPTS = 100000;
+    constexpr std::size_t DEFAULT_GROUP_DIVISOR = 4;  // N/4 groups for balance between memory and speed
+    constexpr std::size_t DISTRIBUTION_FACTOR = 2;     // 2*N range for better distribution
+    constexpr std::size_t bits_per_type_id = 8 * sizeof(type_id);
+
+    std::default_random_engine rnd(DEFAULT_RANDOM_SEED);
     std::uniform_int_distribution<std::size_t> uniform_dist;
     std::size_t total_attempts = 0;
 
     // PtHash algorithm: partition keys into groups, then find displacements
     // Number of groups: typically sqrt(N) to N/4 for good performance
-    num_groups = (std::max)(std::size_t(1), table_size / 4);
+    num_groups = (std::max)(std::size_t(1), table_size / DEFAULT_GROUP_DIVISOR);
     if (num_groups > table_size) num_groups = table_size;
     
     // Calculate bits needed for num_groups
@@ -249,25 +258,25 @@ void minimal_perfect_hash::fn<Registry>::initialize(
         power <<= 1;
         ++GM;
     }
-    group_shift = 8 * sizeof(type_id) - GM;
+    group_shift = bits_per_type_id - GM;
 
     if constexpr (InitializeContext::template has_option<trace>) {
         ctx.tr << "  Using " << num_groups << " groups for " << table_size << " keys\n";
     }
 
     // Try different pilot hash parameters
-    for (std::size_t pass = 0; pass < 10 && total_attempts < 100000; ++pass) {
+    for (std::size_t pass = 0; pass < MAX_PASSES && total_attempts < MAX_ATTEMPTS; ++pass) {
         mult = uniform_dist(rnd) | 1;
         group_mult = uniform_dist(rnd) | 1;
         
         // Calculate M for pilot hash (number of bits for table_size range)
         std::size_t M = 0;
         power = 1;
-        while (power < table_size * 2) {  // Use 2*N for better distribution
+        while (power < table_size * DISTRIBUTION_FACTOR) {
             power <<= 1;
             ++M;
         }
-        shift = 8 * sizeof(type_id) - M;
+        shift = bits_per_type_id - M;
 
         // Partition keys into groups
         std::vector<std::vector<type_id>> groups(num_groups);
@@ -295,9 +304,9 @@ void minimal_perfect_hash::fn<Registry>::initialize(
 
             // Try different displacement values
             bool found = false;
-            for (std::size_t disp = 0; disp < table_size * 2 && !found; ++disp) {
+            for (std::size_t disp = 0; disp < table_size * DISTRIBUTION_FACTOR && !found; ++disp) {
                 ++total_attempts;
-                if (total_attempts > 100000) {
+                if (total_attempts > MAX_ATTEMPTS) {
                     success = false;
                     break;
                 }

From b4866427482cee9837030179af07fd76e31d5f48 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 04:56:37 +0000
Subject: [PATCH 07/15] Optimize positions vector allocation with reserve()

Co-authored-by: jll63 <5083077+jll63@users.noreply.github.com>
---
 _codeql_detected_source_root                               | 1 +
 include/boost/openmethod/policies/minimal_perfect_hash.hpp | 1 +
 2 files changed, 2 insertions(+)
 create mode 120000 _codeql_detected_source_root

diff --git a/_codeql_detected_source_root b/_codeql_detected_source_root
new file mode 120000
index 00000000..945c9b46
--- /dev/null
+++ b/_codeql_detected_source_root
@@ -0,0 +1 @@
+.
\ No newline at end of file
diff --git a/include/boost/openmethod/policies/minimal_perfect_hash.hpp b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
index b9e93a09..7a693ca6 100644
--- a/include/boost/openmethod/policies/minimal_perfect_hash.hpp
+++ b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
@@ -313,6 +313,7 @@ void minimal_perfect_hash::fn<Registry>::initialize(
 
                 // Check if this displacement works for all keys in group
                 std::vector<std::size_t> positions;
+                positions.reserve(groups[g].size());
                 bool valid = true;
                 for (auto key : groups[g]) {
                     auto pilot = (mult * reinterpret_cast<detail::uintptr>(key)) >> shift;

From 7e44f683ea4ea7d5566da0c157dd39ee2c82d59d Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 05:33:17 +0000
Subject: [PATCH 08/15] Allow up to 10% bucket waste in minimal_perfect_hash

Co-authored-by: jll63 <5083077+jll63@users.noreply.github.com>
---
 .../policies/minimal_perfect_hash.hpp         | 33 +++++++++++--------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/include/boost/openmethod/policies/minimal_perfect_hash.hpp b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
index 7a693ca6..c32af666 100644
--- a/include/boost/openmethod/policies/minimal_perfect_hash.hpp
+++ b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
@@ -48,14 +48,14 @@ namespace policies {
 //! function in the form `H(x)=(M*x)>>N`. It uses the PtHash algorithm to
 //! determine values for `M` and `N` that result in a minimal perfect hash
 //! function for the set of registered type_ids. This means that the hash
-//! function is collision-free and the codomain is exactly the size of the
-//! domain, resulting in a dense range [0, n-1] for n inputs.
+//! function is collision-free and the codomain is approximately the size of
+//! the domain, resulting in a dense range [0, n-1] for n inputs.
 //!
 //! Unlike @ref fast_perfect_hash, which uses a hash table of size 2^k
 //! (typically larger than needed) and may have unused slots, this policy
-//! ensures the hash table has exactly n slots for n type_ids, with all
-//! slots filled. This minimizes memory usage but may require more search
-//! attempts during initialization.
+//! uses approximately 1.1*n slots for n type_ids (allowing up to 10% waste).
+//! This minimizes memory usage while maintaining good search performance
+//! during initialization.
 struct minimal_perfect_hash : type_hash {
 
     //! Cannot find hash factors
@@ -193,8 +193,11 @@ void minimal_perfect_hash::fn<Registry>::initialize(
         ctx.tr << "Finding minimal perfect hash using PtHash for " << N << " types\n";
     }
 
-    // Table size is exactly N for minimal perfect hash
-    table_size = N;
+    // Table size is N * 1.1 to allow up to 10% waste (makes finding hash easier)
+    table_size = N + N / 10;
+    if (table_size == N && N > 0) {
+        table_size = N + 1;  // Ensure at least 1 extra slot for N > 0
+    }
     
     if (table_size == 0) {
         shift = 0;
@@ -241,6 +244,7 @@ void minimal_perfect_hash::fn<Registry>::initialize(
     constexpr std::size_t DEFAULT_GROUP_DIVISOR = 4;  // N/4 groups for balance between memory and speed
     constexpr std::size_t DISTRIBUTION_FACTOR = 2;     // 2*N range for better distribution
     constexpr std::size_t bits_per_type_id = 8 * sizeof(type_id);
+    // Allow 10% waste to make finding a hash function easier while still being memory-efficient
 
     std::default_random_engine rnd(DEFAULT_RANDOM_SEED);
     std::uniform_int_distribution<std::size_t> uniform_dist;
@@ -343,19 +347,20 @@ void minimal_perfect_hash::fn<Registry>::initialize(
         }
 
         if (success) {
-            // Verify all positions are used (minimal property)
-            bool all_used = true;
+            // Count how many positions are used
+            std::size_t used_count = 0;
             for (std::size_t i = 0; i < table_size; ++i) {
-                if (detail::uintptr(buckets[i]) == detail::uintptr_max) {
-                    all_used = false;
-                    break;
+                if (detail::uintptr(buckets[i]) != detail::uintptr_max) {
+                    used_count++;
                 }
             }
 
-            if (all_used) {
+            // Accept if we've placed all keys (allow up to 10% waste)
+            if (used_count == keys.size()) {
                 if constexpr (InitializeContext::template has_option<trace>) {
                     ctx.tr << "  Found minimal perfect hash after " << total_attempts
-                           << " attempts\n";
+                           << " attempts; " << used_count << "/" << table_size 
+                           << " slots used\n";
                 }
                 return;
             }

From d2885bad9428e1a901b7ddf91fa60dfea94632f9 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 05:35:01 +0000
Subject: [PATCH 09/15] Fix table size calculation and documentation for 10%
 waste

Co-authored-by: jll63 <5083077+jll63@users.noreply.github.com>
---
 .../boost/openmethod/policies/minimal_perfect_hash.hpp    | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/include/boost/openmethod/policies/minimal_perfect_hash.hpp b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
index c32af666..78353522 100644
--- a/include/boost/openmethod/policies/minimal_perfect_hash.hpp
+++ b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
@@ -49,7 +49,7 @@ namespace policies {
 //! determine values for `M` and `N` that result in a minimal perfect hash
 //! function for the set of registered type_ids. This means that the hash
 //! function is collision-free and the codomain is approximately the size of
-//! the domain, resulting in a dense range [0, n-1] for n inputs.
+//! the domain, resulting in a dense range [0, 1.1*n-1] for n inputs.
 //!
 //! Unlike @ref fast_perfect_hash, which uses a hash table of size 2^k
 //! (typically larger than needed) and may have unused slots, this policy
@@ -194,10 +194,8 @@ void minimal_perfect_hash::fn<Registry>::initialize(
     }
 
     // Table size is N * 1.1 to allow up to 10% waste (makes finding hash easier)
-    table_size = N + N / 10;
-    if (table_size == N && N > 0) {
-        table_size = N + 1;  // Ensure at least 1 extra slot for N > 0
-    }
+    // Use (N * 11 + 9) / 10 to ensure proper rounding up for small N
+    table_size = (N * 11 + 9) / 10;
     
     if (table_size == 0) {
         shift = 0;

From 509fcaaf4b1c90ff27e47c56984c793a22d09956 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 05:36:04 +0000
Subject: [PATCH 10/15] Improve code readability with named constants for waste
 factor

Co-authored-by: jll63 <5083077+jll63@users.noreply.github.com>
---
 .../boost/openmethod/policies/minimal_perfect_hash.hpp    | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/boost/openmethod/policies/minimal_perfect_hash.hpp b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
index 78353522..5f916a51 100644
--- a/include/boost/openmethod/policies/minimal_perfect_hash.hpp
+++ b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
@@ -194,8 +194,11 @@ void minimal_perfect_hash::fn<Registry>::initialize(
     }
 
     // Table size is N * 1.1 to allow up to 10% waste (makes finding hash easier)
-    // Use (N * 11 + 9) / 10 to ensure proper rounding up for small N
-    table_size = (N * 11 + 9) / 10;
+    // Formula: ceil(N * 1.1) = (N * 11 + 9) / 10 ensures proper rounding for all N
+    constexpr std::size_t WASTE_FACTOR_NUMERATOR = 11;    // 1.1 = 11/10
+    constexpr std::size_t WASTE_FACTOR_DENOMINATOR = 10;
+    constexpr std::size_t ROUNDING_ADJUSTMENT = 9;         // For ceiling division
+    table_size = (N * WASTE_FACTOR_NUMERATOR + ROUNDING_ADJUSTMENT) / WASTE_FACTOR_DENOMINATOR;
     
     if (table_size == 0) {
         shift = 0;
@@ -242,7 +245,6 @@ void minimal_perfect_hash::fn<Registry>::initialize(
     constexpr std::size_t DEFAULT_GROUP_DIVISOR = 4;  // N/4 groups for balance between memory and speed
     constexpr std::size_t DISTRIBUTION_FACTOR = 2;     // 2*N range for better distribution
     constexpr std::size_t bits_per_type_id = 8 * sizeof(type_id);
-    // Allow 10% waste to make finding a hash function easier while still being memory-efficient
 
     std::default_random_engine rnd(DEFAULT_RANDOM_SEED);
     std::uniform_int_distribution<std::size_t> uniform_dist;

From 39b8412b672ace07780c64d0e7f0bfe3c311283b Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 05:41:45 +0000
Subject: [PATCH 11/15] Remove trailing spaces

Co-authored-by: jll63 <5083077+jll63@users.noreply.github.com>
---
 .../openmethod/policies/minimal_perfect_hash.hpp | 16 ++++++++--------
 test/test_minimal_perfect_hash.cpp               |  2 +-
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/boost/openmethod/policies/minimal_perfect_hash.hpp b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
index 5f916a51..866f9e0e 100644
--- a/include/boost/openmethod/policies/minimal_perfect_hash.hpp
+++ b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
@@ -99,7 +99,7 @@ struct minimal_perfect_hash : type_hash {
         //!
         //! Uses the PtHash algorithm to find:
         //! - Pilot hash parameters (M, N) for H(x) = (M * x) >> N
-        //! - Bucket assignment parameters 
+        //! - Bucket assignment parameters
         //! - Displacement values for each bucket to achieve minimal perfect hashing
         //!
         //! If no suitable values are found, calls the error handler with
@@ -199,7 +199,7 @@ void minimal_perfect_hash::fn<Registry>::initialize(
     constexpr std::size_t WASTE_FACTOR_DENOMINATOR = 10;
     constexpr std::size_t ROUNDING_ADJUSTMENT = 9;         // For ceiling division
     table_size = (N * WASTE_FACTOR_NUMERATOR + ROUNDING_ADJUSTMENT) / WASTE_FACTOR_DENOMINATOR;
-    
+
     if (table_size == 0) {
         shift = 0;
         mult = 1;
@@ -209,7 +209,7 @@ void minimal_perfect_hash::fn<Registry>::initialize(
         detail::minimal_perfect_hash_displacements<Registry>.clear();
         return;
     }
-    
+
     if (table_size == 1) {
         // Special case: only one type
         constexpr std::size_t bits_per_type_id = 8 * sizeof(type_id);
@@ -254,7 +254,7 @@ void minimal_perfect_hash::fn<Registry>::initialize(
     // Number of groups: typically sqrt(N) to N/4 for good performance
     num_groups = (std::max)(std::size_t(1), table_size / DEFAULT_GROUP_DIVISOR);
     if (num_groups > table_size) num_groups = table_size;
-    
+
     // Calculate bits needed for num_groups
     std::size_t GM = 0;
     std::size_t power = 1;
@@ -272,7 +272,7 @@ void minimal_perfect_hash::fn<Registry>::initialize(
     for (std::size_t pass = 0; pass < MAX_PASSES && total_attempts < MAX_ATTEMPTS; ++pass) {
         mult = uniform_dist(rnd) | 1;
         group_mult = uniform_dist(rnd) | 1;
-        
+
         // Calculate M for pilot hash (number of bits for table_size range)
         std::size_t M = 0;
         power = 1;
@@ -298,7 +298,7 @@ void minimal_perfect_hash::fn<Registry>::initialize(
         // Process groups in descending order of size (larger groups first)
         std::vector<std::size_t> group_order(num_groups);
         for (std::size_t i = 0; i < num_groups; ++i) group_order[i] = i;
-        std::sort(group_order.begin(), group_order.end(), 
+        std::sort(group_order.begin(), group_order.end(),
                   [&groups](std::size_t a, std::size_t b) {
                       return groups[a].size() > groups[b].size();
                   });
@@ -359,7 +359,7 @@ void minimal_perfect_hash::fn<Registry>::initialize(
             if (used_count == keys.size()) {
                 if constexpr (InitializeContext::template has_option<trace>) {
                     ctx.tr << "  Found minimal perfect hash after " << total_attempts
-                           << " attempts; " << used_count << "/" << table_size 
+                           << " attempts; " << used_count << "/" << table_size
                            << " slots used\n";
                 }
                 return;
@@ -396,7 +396,7 @@ void minimal_perfect_hash::fn<Registry>::check(std::size_t index, type_id type)
 
 template<class Registry, class Stream>
 auto minimal_perfect_hash::search_error::write(Stream& os) const -> void {
-    os << "could not find minimal perfect hash factors after " << attempts 
+    os << "could not find minimal perfect hash factors after " << attempts
        << " attempts using " << buckets << " buckets\n";
 }
 
diff --git a/test/test_minimal_perfect_hash.cpp b/test/test_minimal_perfect_hash.cpp
index 13c0b407..e8adb0d0 100644
--- a/test/test_minimal_perfect_hash.cpp
+++ b/test/test_minimal_perfect_hash.cpp
@@ -128,7 +128,7 @@ BOOST_AUTO_TEST_CASE(minimal_hash_properties) {
     BOOST_TEST(get_id(d3) == 3);
     BOOST_TEST(get_id(d4) == 4);
     BOOST_TEST(get_id(d5) == 5);
-    
+
     // Verify that the hash function produces a minimal perfect hash
     // (This is implicit - if it didn't, initialization would fail or we'd get wrong results)
 }

From 8e1603766e614c6de07e1cd2dfa0d87a550c9f4e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 05:50:20 +0000
Subject: [PATCH 12/15] Remove duplicate uintptr definitions (lines 23-33)

Co-authored-by: jll63 <5083077+jll63@users.noreply.github.com>
---
 .../openmethod/policies/minimal_perfect_hash.hpp      | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/include/boost/openmethod/policies/minimal_perfect_hash.hpp b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
index 866f9e0e..ef0082cb 100644
--- a/include/boost/openmethod/policies/minimal_perfect_hash.hpp
+++ b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
@@ -21,17 +21,6 @@ namespace boost::openmethod {
 
 namespace detail {
 
-#if defined(UINTPTR_MAX)
-using uintptr = std::uintptr_t;
-constexpr uintptr uintptr_max = UINTPTR_MAX;
-#else
-static_assert(
-    sizeof(std::size_t) == sizeof(void*),
-    "This implementation requires that size_t and void* have the same size.");
-using uintptr = std::size_t;
-constexpr uintptr uintptr_max = (std::numeric_limits<std::size_t>::max)();
-#endif
-
 template<class Registry>
 std::vector<type_id> minimal_perfect_hash_control;
 

From 2a3145718be4650aba656adabd6862dc0a5efc6b Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 05:54:25 +0000
Subject: [PATCH 13/15] Move uintptr definitions to separate namespace
 declaration

Co-authored-by: jll63 <5083077+jll63@users.noreply.github.com>
---
 .../openmethod/policies/minimal_perfect_hash.hpp  | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/include/boost/openmethod/policies/minimal_perfect_hash.hpp b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
index ef0082cb..a32b60c5 100644
--- a/include/boost/openmethod/policies/minimal_perfect_hash.hpp
+++ b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
@@ -17,6 +17,21 @@
 #pragma warning(disable : 4702) // unreachable code
 #endif
 
+namespace boost::openmethod::detail {
+
+#if defined(UINTPTR_MAX)
+using uintptr = std::uintptr_t;
+constexpr uintptr uintptr_max = UINTPTR_MAX;
+#else
+static_assert(
+    sizeof(std::size_t) == sizeof(void*),
+    "This implementation requires that size_t and void* have the same size.");
+using uintptr = std::size_t;
+constexpr uintptr uintptr_max = (std::numeric_limits<std::size_t>::max)();
+#endif
+
+} // namespace boost::openmethod::detail
+
 namespace boost::openmethod {
 
 namespace detail {

From 508687c5d765a2643f1ff84fb3616d0beb4305c1 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 05:57:56 +0000
Subject: [PATCH 14/15] Use anonymous namespace for uintptr definitions

Co-authored-by: jll63 <5083077+jll63@users.noreply.github.com>
---
 .../openmethod/policies/minimal_perfect_hash.hpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/boost/openmethod/policies/minimal_perfect_hash.hpp b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
index a32b60c5..6f492e07 100644
--- a/include/boost/openmethod/policies/minimal_perfect_hash.hpp
+++ b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
@@ -17,7 +17,7 @@
 #pragma warning(disable : 4702) // unreachable code
 #endif
 
-namespace boost::openmethod::detail {
+namespace {
 
 #if defined(UINTPTR_MAX)
 using uintptr = std::uintptr_t;
@@ -30,7 +30,7 @@ using uintptr = std::size_t;
 constexpr uintptr uintptr_max = (std::numeric_limits<std::size_t>::max)();
 #endif
 
-} // namespace boost::openmethod::detail
+} // anonymous namespace
 
 namespace boost::openmethod {
 
@@ -142,8 +142,8 @@ struct minimal_perfect_hash : type_hash {
         //! @return The hash value
         BOOST_FORCEINLINE
         static auto hash(type_id type) -> std::size_t {
-            auto pilot = (mult * reinterpret_cast<detail::uintptr>(type)) >> shift;
-            auto group = (group_mult * reinterpret_cast<detail::uintptr>(type)) >> group_shift;
+            auto pilot = (mult * reinterpret_cast<uintptr>(type)) >> shift;
+            auto group = (group_mult * reinterpret_cast<uintptr>(type)) >> group_shift;
             auto index = (pilot + detail::minimal_perfect_hash_displacements<Registry>[group]) % table_size;
 
             if constexpr (Registry::has_runtime_checks) {
@@ -289,13 +289,13 @@ void minimal_perfect_hash::fn<Registry>::initialize(
         // Partition keys into groups
         std::vector<std::vector<type_id>> groups(num_groups);
         for (auto key : keys) {
-            auto group_idx = ((group_mult * reinterpret_cast<detail::uintptr>(key)) >> group_shift) % num_groups;
+            auto group_idx = ((group_mult * reinterpret_cast<uintptr>(key)) >> group_shift) % num_groups;
             groups[group_idx].push_back(key);
         }
 
         // Try to find displacements for each group
         detail::minimal_perfect_hash_displacements<Registry>.assign(num_groups, 0);
-        buckets.assign(table_size, type_id(detail::uintptr_max));
+        buckets.assign(table_size, type_id(uintptr_max));
         std::vector<bool> used(table_size, false);
         bool success = true;
 
@@ -324,7 +324,7 @@ void minimal_perfect_hash::fn<Registry>::initialize(
                 positions.reserve(groups[g].size());
                 bool valid = true;
                 for (auto key : groups[g]) {
-                    auto pilot = (mult * reinterpret_cast<detail::uintptr>(key)) >> shift;
+                    auto pilot = (mult * reinterpret_cast<uintptr>(key)) >> shift;
                     auto pos = (pilot + disp) % table_size;
                     if (used[pos]) {
                         valid = false;
@@ -354,7 +354,7 @@ void minimal_perfect_hash::fn<Registry>::initialize(
             // Count how many positions are used
             std::size_t used_count = 0;
             for (std::size_t i = 0; i < table_size; ++i) {
-                if (detail::uintptr(buckets[i]) != detail::uintptr_max) {
+                if (uintptr(buckets[i]) != uintptr_max) {
                     used_count++;
                 }
             }

From 22673b0f45e99ddced51ad91c58b6ab23ccb264a Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 06:05:28 +0000
Subject: [PATCH 15/15] Fix potential overflow in group hash calculation by
 using uint32_t multiplier

Co-authored-by: jll63 <5083077+jll63@users.noreply.github.com>
---
 .../boost/openmethod/policies/minimal_perfect_hash.hpp   | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/include/boost/openmethod/policies/minimal_perfect_hash.hpp b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
index 6f492e07..c07e53aa 100644
--- a/include/boost/openmethod/policies/minimal_perfect_hash.hpp
+++ b/include/boost/openmethod/policies/minimal_perfect_hash.hpp
@@ -88,7 +88,7 @@ struct minimal_perfect_hash : type_hash {
         static std::size_t shift;
         static std::size_t table_size;  // N for minimal perfect hash
         static std::size_t num_groups;
-        static std::size_t group_mult;
+        static std::uint32_t group_mult;  // Smaller type to avoid overflow
         static std::size_t group_shift;
 
         static void check(std::size_t index, type_id type);
@@ -179,7 +179,7 @@ template<class Registry>
 std::size_t minimal_perfect_hash::fn<Registry>::num_groups;
 
 template<class Registry>
-std::size_t minimal_perfect_hash::fn<Registry>::group_mult;
+std::uint32_t minimal_perfect_hash::fn<Registry>::group_mult;
 
 template<class Registry>
 std::size_t minimal_perfect_hash::fn<Registry>::group_shift;
@@ -275,7 +275,10 @@ void minimal_perfect_hash::fn<Registry>::initialize(
     // Try different pilot hash parameters
     for (std::size_t pass = 0; pass < MAX_PASSES && total_attempts < MAX_ATTEMPTS; ++pass) {
         mult = uniform_dist(rnd) | 1;
-        group_mult = uniform_dist(rnd) | 1;
+        // Use a smaller multiplier for group hash to avoid overflow
+        // We only need enough bits to distinguish between num_groups
+        std::uniform_int_distribution<std::uint32_t> group_dist;
+        group_mult = group_dist(rnd) | 1;
 
         // Calculate M for pilot hash (number of bits for table_size range)
         std::size_t M = 0;