NVIDIA · akifcorduk · Oct 22, 2025 · Oct 23, 2025 · Oct 24, 2025 · Oct 24, 2025
diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp
@@ -383,8 +383,6 @@ int main(int argc, char* argv[])
   double memory_limit    = program.get<double>("--memory-limit");
   bool track_allocations = program.get<std::string>("--track-allocations")[0] == 't';
 
-  if (num_cpu_threads < 0) { num_cpu_threads = omp_get_max_threads() / n_gpus; }
-
   if (program.is_used("--out-dir")) {
     out_dir     = program.get<std::string>("--out-dir");
     result_file = out_dir + "/final_result.csv";
@@ -421,6 +419,7 @@ int main(int argc, char* argv[])
         paths.push_back(entry.path());
       }
     }
+    if (num_cpu_threads < 0) { num_cpu_threads = omp_get_max_threads() / n_gpus; }
     // if batch_num is given, trim the paths to only concerned batch
     if (batch_num != -1) {
       if (n_batches <= 0) {
@@ -487,6 +486,7 @@ int main(int argc, char* argv[])
     }
     merge_result_files(out_dir, result_file, n_gpus, batch_num);
   } else {
+    if (num_cpu_threads < 0) { num_cpu_threads = omp_get_max_threads(); }
     auto memory_resource = make_async();
     if (memory_limit > 0) {
       auto limiting_adaptor =

@@ -75,6 +75,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
   list(APPEND CUOPT_CXX_FLAGS -Werror -Wno-error=deprecated-declarations)
 endif(CMAKE_COMPILER_IS_GNUCXX)
 
+
 # To use sanitizer with cuda runtime, one must follow a few steps:
 # 1. Run the binary with env var set: LD_PRELOAD="$(gcc -print-file-name=libasan.so)" ASAN_OPTIONS='protect_shadow_gap=0:replace_intrin=0'
 # 2. (Optional) To run with a debugger (gdb or cuda-gdb) use the additional ASAN option alloc_dealloc_mismatch=0
@@ -178,7 +179,6 @@ if(DEFINE_ASSERT)
   add_definitions(-UNDEBUG)
 endif()
 
-
 # ##################################################################################################
 # - find CPM based dependencies  ------------------------------------------------------------------
 rapids_cpm_init()

@@ -495,8 +495,7 @@ void diversity_manager_t<i_t, f_t>::diversity_step(i_t max_iterations_without_im
     improved = false;
     while (k-- > 0) {
       if (check_b_b_preemption()) { return; }
-      auto new_sol_vector = population.get_external_solutions();
-      recombine_and_ls_with_all(new_sol_vector);
+      population.add_external_solutions_to_population();
       population.adjust_weights_according_to_best_feasible();
       cuopt_assert(population.test_invariant(), "");
       if (population.current_size() < 2) {
@@ -649,7 +648,15 @@ diversity_manager_t<i_t, f_t>::recombine_and_local_search(solution_t<i_t, f_t>&
   ls_config_t<i_t, f_t> ls_config;
   ls_config.best_objective_of_parents    = best_objective_of_parents;
   ls_config.at_least_one_parent_feasible = at_least_one_parent_feasible;
-  success = this->run_local_search(offspring, population.weights, timer, ls_config);
+  offspring.swap_problem_pointers();
+  population.weights_with_cuts.cstr_weights.resize(offspring.problem_ptr->n_constraints,
+                                                   offspring.handle_ptr->get_stream());
+  raft::copy(population.weights_with_cuts.cstr_weights.data(),
+             population.weights.cstr_weights.data(),
+             population.weights.cstr_weights.size(),
+             offspring.handle_ptr->get_stream());
+  success = this->run_local_search(offspring, population.weights_with_cuts, timer, ls_config);
+  offspring.swap_problem_pointers();
   if (!success) {
     // add the attempt
     mab_recombiner.add_mab_reward(mab_recombiner.last_chosen_option,

@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -24,7 +24,6 @@ constexpr double weight_decrease_ratio       = 0.9;
 constexpr double max_infeasibility_weight    = 1e12;
 constexpr double min_infeasibility_weight    = 1.;
 constexpr double infeasibility_balance_ratio = 1.1;
-constexpr double halving_skip_ratio          = 0.75;
 
 template <typename i_t, typename f_t>
 population_t<i_t, f_t>::population_t(std::string const& name_,
@@ -41,8 +40,8 @@ population_t<i_t, f_t>::population_t(std::string const& name_,
     max_solutions(max_solutions_),
     infeasibility_importance(infeasibility_weight_),
     weights(0, context.problem_ptr->handle_ptr),
+    weights_with_cuts(0, context.problem_ptr->handle_ptr),
     rng(cuopt::seed_generator::get_seed()),
-    early_exit_primal_generation(false),
     population_hash_map(*problem_ptr),
     timer(0)
 {
@@ -64,6 +63,14 @@ i_t get_max_var_threshold(i_t n_vars)
   return n_vars - 10;
 }
 
+template <typename i_t, typename f_t>
+void population_t<i_t, f_t>::apply_problem_ptr_to_all_solutions()
+{
+  for (size_t i = 0; i < indices.size(); i++) {
+    solutions[indices[i].first].second.problem_with_cuts_ptr = problem_ptr_with_cuts;
+  }
+}
+
 template <typename i_t, typename f_t>
 void population_t<i_t, f_t>::allocate_solutions()
 {
@@ -73,6 +80,20 @@ void population_t<i_t, f_t>::allocate_solutions()
   }
 }
 
+template <typename i_t, typename f_t>
+void population_t<i_t, f_t>::set_problem_ptr_with_cuts(problem_t<i_t, f_t>* problem_ptr_with_cuts)
+{
+  constexpr f_t ten           = 10.;
+  this->problem_ptr_with_cuts = problem_ptr_with_cuts;
+  weights_with_cuts.cstr_weights.resize(problem_ptr_with_cuts->n_constraints,
+                                        problem_ptr_with_cuts->handle_ptr->get_stream());
+  // fill last element with default
+  thrust::uninitialized_fill(problem_ptr_with_cuts->handle_ptr->get_thrust_policy(),
+                             weights_with_cuts.cstr_weights.begin() + problem_ptr->n_constraints,
+                             weights_with_cuts.cstr_weights.end(),
+                             ten);
+}
+
 template <typename i_t, typename f_t>
 void population_t<i_t, f_t>::initialize_population()
 {
@@ -87,6 +108,12 @@ void population_t<i_t, f_t>::initialize_population()
                              weights.cstr_weights.begin(),
                              weights.cstr_weights.end(),
                              ten);
+  weights_with_cuts.cstr_weights.resize(problem_ptr->n_constraints,
+                                        problem_ptr->handle_ptr->get_stream());
+  thrust::uninitialized_fill(problem_ptr->handle_ptr->get_thrust_policy(),
+                             weights_with_cuts.cstr_weights.begin(),
+                             weights_with_cuts.cstr_weights.end(),
+                             ten);
 }
 
 template <typename i_t, typename f_t>
@@ -109,12 +136,11 @@ std::pair<solution_t<i_t, f_t>, solution_t<i_t, f_t>> population_t<i_t, f_t>::ge
   auto second_solution = solutions[indices[j].first].second;
   // if best feasible and best are the same, take the second index instead of best
   if (i == 0 && j == 1) {
-    bool same =
-      check_integer_equal_on_indices(first_solution.problem_ptr->integer_indices,
-                                     first_solution.assignment,
-                                     second_solution.assignment,
-                                     first_solution.problem_ptr->tolerances.integrality_tolerance,
-                                     first_solution.handle_ptr);
+    bool same = check_integer_equal_on_indices(problem_ptr->integer_indices,
+                                               first_solution.assignment,
+                                               second_solution.assignment,
+                                               problem_ptr->tolerances.integrality_tolerance,
+                                               first_solution.handle_ptr);
     if (same) {
       auto new_sol    = solutions[indices[2].first].second;
       second_solution = std::move(new_sol);
@@ -172,7 +198,6 @@ void population_t<i_t, f_t>::add_external_solution(const std::vector<f_t>& solut
     CUOPT_LOG_DEBUG("Found new best solution %g in external queue",
                     problem_ptr->get_user_obj_from_solver_obj(objective));
   }
-  if (external_solution_queue.size() >= 5) { early_exit_primal_generation = true; }
   solutions_in_external_queue_ = true;
 }
 
@@ -184,6 +209,7 @@ void population_t<i_t, f_t>::add_external_solutions_to_population()
 
   auto new_sol_vector = get_external_solutions();
   add_solutions_from_vec(std::move(new_sol_vector));
+  apply_problem_ptr_to_all_solutions();
 }
 
 // normally we would need a lock here but these are boolean types and race conditions are not
@@ -192,7 +218,6 @@ template <typename i_t, typename f_t>
 void population_t<i_t, f_t>::preempt_heuristic_solver()
 {
   context.preempt_heuristic_solver_ = true;
-  early_exit_primal_generation      = true;
 }
 
 template <typename i_t, typename f_t>
@@ -668,42 +693,6 @@ std::vector<solution_t<i_t, f_t>> population_t<i_t, f_t>::population_to_vector()
   return sol_vec;
 }
 
-template <typename i_t, typename f_t>
-void population_t<i_t, f_t>::halve_the_population()
-{
-  raft::common::nvtx::range fun_scope("halve_the_population");
-  // try 3/4 here
-  if (current_size() <= (max_solutions * halving_skip_ratio)) { return; }
-  CUOPT_LOG_DEBUG("Halving the population, current size: %lu", current_size());
-  // put population into a vector
-  auto sol_vec                  = population_to_vector();
-  i_t counter                   = 0;
-  constexpr i_t max_adjustments = 4;
-  size_t max_var_threshold      = get_max_var_threshold(problem_ptr->n_integer_vars);
-
-  std::lock_guard<std::recursive_mutex> lock(write_mutex);
-  while (current_size() > max_solutions / 2) {
-    clear_except_best_feasible();
-    var_threshold = std::max(var_threshold * 0.97, 0.5 * problem_ptr->n_integer_vars);
-    for (auto& sol : sol_vec) {
-      add_solution(solution_t<i_t, f_t>(sol));
-    }
-    if (counter++ > max_adjustments) break;
-  }
-  counter = 0;
-  // if we removed too many decrease the diversity a little
-  while (current_size() < max_solutions / 4) {
-    clear_except_best_feasible();
-    var_threshold = std::min(
-      max_var_threshold,
-      std::min((size_t)(var_threshold * 1.02), (size_t)(0.995 * problem_ptr->n_integer_vars)));
-    for (auto& sol : sol_vec) {
-      add_solution(solution_t<i_t, f_t>(sol));
-    }
-    if (counter++ > max_adjustments) break;
-  }
-}
-
 template <typename i_t, typename f_t>
 size_t population_t<i_t, f_t>::find_free_solution_index()
 {

@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -151,7 +151,6 @@ class population_t {
   void find_diversity(std::vector<solution_t<i_t, f_t>>& initial_sol_vector, bool avg);
 
   std::vector<solution_t<i_t, f_t>> population_to_vector();
-  void halve_the_population();
 
   void run_solution_callbacks(solution_t<i_t, f_t>& sol);
 
@@ -161,6 +160,9 @@ class population_t {
 
   void diversity_step(i_t max_iterations_without_improvement);
 
+  void set_problem_ptr_with_cuts(problem_t<i_t, f_t>* problem_ptr_with_cuts);
+  void apply_problem_ptr_to_all_solutions();
+
   // does some consistency tests
   bool test_invariant();
 
@@ -169,6 +171,7 @@ class population_t {
   std::string name;
   mip_solver_context_t<i_t, f_t>& context;
   problem_t<i_t, f_t>* problem_ptr;
+  problem_t<i_t, f_t>* problem_ptr_with_cuts;
   diversity_manager_t<i_t, f_t>& dm;
   i_t var_threshold;
   i_t initial_threshold;
@@ -178,6 +181,7 @@ class population_t {
   f_t infeasibility_importance = 100.;
   size_t max_solutions;
   weight_t<i_t, f_t> weights;
+  weight_t<i_t, f_t> weights_with_cuts;
   std::vector<std::pair<size_t, f_t>> indices;
   std::vector<std::pair<bool, solution_t<i_t, f_t>>> solutions;
 
@@ -202,7 +206,7 @@ class population_t {
   i_t update_iter = 0;
   std::recursive_mutex write_mutex;
   std::mutex solution_mutex;
-  std::atomic<bool> early_exit_primal_generation = false;
+  std::atomic<bool> preempt_heuristic_solver_    = false;
   std::atomic<bool> solutions_in_external_queue_ = false;
   f_t best_feasible_objective                    = std::numeric_limits<f_t>::max();
   assignment_hash_map_t<i_t, f_t> population_hash_map;

@@ -140,6 +140,7 @@ class bound_prop_recombiner_t : public recombiner_t<i_t, f_t> {
     auto& other_solution   = a.get_feasible() ? b : a;
     // copy the solution from guiding
     solution_t<i_t, f_t> offspring(guiding_solution);
+    offspring.swap_problem_pointers();
     // find same values and populate it to offspring
     i_t n_different_vars = this->assign_same_integer_values(a, b, offspring);
     CUOPT_LOG_DEBUG("BP rec: Number of different variables %d MAX_VARS %d",
@@ -181,8 +182,9 @@ class bound_prop_recombiner_t : public recombiner_t<i_t, f_t> {
       rmm::device_uvector<f_t> old_assignment(offspring.assignment,
                                               offspring.handle_ptr->get_stream());
       offspring.handle_ptr->sync_stream();
-      offspring.assignment  = std::move(fixed_assignment);
-      offspring.problem_ptr = &fixed_problem;
+      offspring.assignment                  = std::move(fixed_assignment);
+      problem_t<i_t, f_t>* orig_problem_ptr = offspring.problem_ptr;
+      offspring.problem_ptr                 = &fixed_problem;
       cuopt_func_call(offspring.test_variable_bounds(false));
       get_probing_values_for_feasible(guiding_solution,
                                       other_solution,
@@ -199,7 +201,7 @@ class bound_prop_recombiner_t : public recombiner_t<i_t, f_t> {
       constraint_prop.single_rounding_only = false;
       cuopt_func_call(bool feasible_after_bounds_prop = offspring.get_feasible());
       offspring.handle_ptr->sync_stream();
-      offspring.problem_ptr = a.problem_ptr;
+      offspring.problem_ptr = orig_problem_ptr;
       fixed_assignment      = std::move(offspring.assignment);
       offspring.assignment  = std::move(old_assignment);
       offspring.handle_ptr->sync_stream();
@@ -219,6 +221,7 @@ class bound_prop_recombiner_t : public recombiner_t<i_t, f_t> {
     }
     constraint_prop.max_n_failed_repair_iterations = 1;
     cuopt_func_call(offspring.test_number_all_integer());
+    offspring.swap_problem_pointers();
     bool better_cost_than_parents =
       offspring.get_quality(weights) <
       std::min(other_solution.get_quality(weights), guiding_solution.get_quality(weights));

@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -44,6 +44,7 @@ class fp_recombiner_t : public recombiner_t<i_t, f_t> {
     auto& other_solution   = a.get_feasible() ? b : a;
     // copy the solution from A
     solution_t<i_t, f_t> offspring(guiding_solution);
+    offspring.swap_problem_pointers();
     // find same values and populate it to offspring
     i_t n_different_vars =
       this->assign_same_integer_values(guiding_solution, other_solution, offspring);
@@ -126,6 +127,7 @@ class fp_recombiner_t : public recombiner_t<i_t, f_t> {
         fp_recombiner_config_t::decrease_max_n_of_vars_from_other();
       }
     }
+    offspring.swap_problem_pointers();
     bool better_cost_than_parents =
       offspring.get_quality(weights) <
       std::min(other_solution.get_quality(weights), guiding_solution.get_quality(weights));

@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -75,6 +75,7 @@ class line_segment_recombiner_t : public recombiner_t<i_t, f_t> {
     auto& other_solution   = a.get_feasible() ? b : a;
     // copy the solution from A
     solution_t<i_t, f_t> offspring(guiding_solution);
+    offspring.swap_problem_pointers();
     timer_t line_segment_timer{ls_recombiner_config_t::time_limit};
     // TODO after we have the conic combination, detect the lambda change
     // (i.e. the integral variables flip on line segment)
@@ -99,6 +100,7 @@ class line_segment_recombiner_t : public recombiner_t<i_t, f_t> {
                                             is_feasibility_run,
                                             line_segment_timer);
     line_segment_search.settings = {};
+    offspring.swap_problem_pointers();
     bool better_cost_than_parents =
       offspring.get_quality(weights) <
       std::min(other_solution.get_quality(weights), guiding_solution.get_quality(weights));