diff --git a/cpp/src/dual_simplex/CMakeLists.txt b/cpp/src/dual_simplex/CMakeLists.txt index e8a9b5dce..af1415fa9 100644 --- a/cpp/src/dual_simplex/CMakeLists.txt +++ b/cpp/src/dual_simplex/CMakeLists.txt @@ -1,5 +1,5 @@ # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on @@ -31,6 +31,7 @@ set(DUAL_SIMPLEX_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/triangle_solve.cpp ${CMAKE_CURRENT_SOURCE_DIR}/vector_math.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pinned_host_allocator.cu + ${CMAKE_CURRENT_SOURCE_DIR}/diving_heuristics.cpp ) # Uncomment to enable debug info diff --git a/cpp/src/dual_simplex/basis_solves.cpp b/cpp/src/dual_simplex/basis_solves.cpp index db24f55a2..f5cd54053 100644 --- a/cpp/src/dual_simplex/basis_solves.cpp +++ b/cpp/src/dual_simplex/basis_solves.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -613,6 +613,8 @@ i_t factorize_basis(const csc_matrix_t& A, template i_t basis_repair(const csc_matrix_t& A, const simplex_solver_settings_t& settings, + const std::vector& lower, + const std::vector& upper, const std::vector& deficient, const std::vector& slacks_needed, std::vector& basis_list, @@ -658,7 +660,15 @@ i_t basis_repair(const csc_matrix_t& A, nonbasic_list[nonbasic_map[replace_j]] = bad_j; vstatus[replace_j] = variable_status_t::BASIC; // This is the main issue. What value should bad_j take on. - vstatus[bad_j] = variable_status_t::NONBASIC_FREE; + if (lower[bad_j] == -inf && upper[bad_j] == inf) { + vstatus[bad_j] = variable_status_t::NONBASIC_FREE; + } else if (lower[bad_j] > -inf) { + vstatus[bad_j] = variable_status_t::NONBASIC_LOWER; + } else if (upper[bad_j] < inf) { + vstatus[bad_j] = variable_status_t::NONBASIC_UPPER; + } else { + assert(1 == 0); + } } return 0; @@ -849,6 +859,8 @@ template int factorize_basis(const csc_matrix_t& A, template int basis_repair(const csc_matrix_t& A, const simplex_solver_settings_t& settings, + const std::vector& lower, + const std::vector& upper, const std::vector& deficient, const std::vector& slacks_needed, std::vector& basis_list, diff --git a/cpp/src/dual_simplex/basis_solves.hpp b/cpp/src/dual_simplex/basis_solves.hpp index b668c0f46..295bedccd 100644 --- a/cpp/src/dual_simplex/basis_solves.hpp +++ b/cpp/src/dual_simplex/basis_solves.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -42,6 +42,8 @@ i_t factorize_basis(const csc_matrix_t& A, template i_t basis_repair(const csc_matrix_t& A, const simplex_solver_settings_t& settings, + const std::vector& lower, + const std::vector& upper, const std::vector& deficient, const std::vector& slacks_needed, std::vector& basis_list, diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index 6b79f3c86..2c781a515 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -2046,6 +2046,8 @@ template int basis_update_mpf_t::refactor_basis( const csc_matrix_t& A, const simplex_solver_settings_t& settings, + const std::vector& lower, + const std::vector& upper, std::vector& basic_list, std::vector& nonbasic_list, std::vector& vstatus) @@ -2066,7 +2068,8 @@ int basis_update_mpf_t::refactor_basis( deficient, slacks_needed) == -1) { settings.log.debug("Initial factorization failed\n"); - basis_repair(A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); + basis_repair( + A, settings, lower, upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); #ifdef CHECK_BASIS_REPAIR const i_t m = A.m; diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index cea907074..038db59b9 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -223,6 +223,9 @@ class basis_update_mpf_t { reset_stats(); } + basis_update_mpf_t(const basis_update_mpf_t& other) = default; + basis_update_mpf_t& operator=(const basis_update_mpf_t& other) = default; + void print_stats() const { i_t total_L_transpose_calls = total_sparse_L_transpose_ + total_dense_L_transpose_; @@ -373,6 +376,8 @@ class basis_update_mpf_t { // Compute L*U = A(p, basic_list) int refactor_basis(const csc_matrix_t& A, const simplex_solver_settings_t& settings, + const std::vector& lower, + const std::vector& upper, std::vector& basic_list, std::vector& nonbasic_list, std::vector& vstatus); diff --git a/cpp/src/dual_simplex/bnb_worker.hpp b/cpp/src/dual_simplex/bnb_worker.hpp new file mode 100644 index 000000000..e77b64639 --- /dev/null +++ b/cpp/src/dual_simplex/bnb_worker.hpp @@ -0,0 +1,280 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace cuopt::linear_programming::dual_simplex { + +constexpr int bnb_num_worker_types = 5; + +// Indicate the search and variable selection algorithms used by each thread +// in B&B (See [1]). +// +// [1] T. Achterberg, “Constraint Integer Programming,” PhD, Technischen Universität Berlin, +// Berlin, 2007. doi: 10.14279/depositonce-1634. +enum bnb_worker_type_t : int { + EXPLORATION = 0, // Best-First + Plunging. + PSEUDOCOST_DIVING = 1, // Pseudocost diving (9.2.5) + LINE_SEARCH_DIVING = 2, // Line search diving (9.2.4) + GUIDED_DIVING = 3, // Guided diving (9.2.3). + COEFFICIENT_DIVING = 4 // Coefficient diving (9.2.1) +}; + +template +struct bnb_stats_t { + f_t start_time = 0.0; + omp_atomic_t total_lp_solve_time = 0.0; + omp_atomic_t nodes_explored = 0; + omp_atomic_t nodes_unexplored = 0; + omp_atomic_t total_lp_iters = 0; + omp_atomic_t nodes_since_last_log = 0; + omp_atomic_t last_log = 0.0; +}; + +template +class bnb_worker_data_t { + public: + const i_t worker_id; + omp_atomic_t worker_type; + omp_atomic_t is_active; + omp_atomic_t lower_bound; + + lp_problem_t leaf_problem; + lp_solution_t leaf_solution; + std::vector leaf_edge_norms; + + basis_update_mpf_t basis_factors; + std::vector basic_list; + std::vector nonbasic_list; + + bounds_strengthening_t node_presolver; + std::vector bounds_changed; + + std::vector start_lower; + std::vector start_upper; + mip_node_t* start_node; + + bool recompute_basis = true; + bool recompute_bounds = true; + + bnb_worker_data_t(i_t worker_id, + const lp_problem_t& original_lp, + const csr_matrix_t& Arow, + const std::vector& var_type, + const simplex_solver_settings_t& settings) + : worker_id(worker_id), + worker_type(EXPLORATION), + is_active(false), + lower_bound(-std::numeric_limits::infinity()), + leaf_problem(original_lp), + leaf_solution(original_lp.num_rows, original_lp.num_cols), + basis_factors(original_lp.num_rows, settings.refactor_frequency), + basic_list(original_lp.num_rows), + nonbasic_list(), + node_presolver(leaf_problem, Arow, {}, var_type), + bounds_changed(original_lp.num_cols, false) + { + } + + // Set the `start_node` for best-first search. + void init_best_first(mip_node_t* node, const lp_problem_t& original_lp) + { + start_node = node; + start_lower = original_lp.lower; + start_upper = original_lp.upper; + worker_type = EXPLORATION; + lower_bound = node->lower_bound; + is_active = true; + } + + // Initialize the worker for diving, setting the `start_node`, `start_lower` and + // `start_upper`. Returns `true` if the starting node is feasible via + // bounds propagation. + bool init_diving(mip_node_t* node, + bnb_worker_type_t type, + const lp_problem_t& original_lp, + const simplex_solver_settings_t& settings) + { + internal_node = node->detach_copy(); + start_node = &internal_node; + + start_lower = original_lp.lower; + start_upper = original_lp.upper; + worker_type = type; + lower_bound = node->lower_bound; + is_active = true; + + std::fill(bounds_changed.begin(), bounds_changed.end(), false); + node->get_variable_bounds(start_lower, start_upper, bounds_changed); + + return node_presolver.bounds_strengthening(start_lower, start_upper, bounds_changed, settings); + } + + // Set the variables bounds for the LP relaxation of the current node. + bool set_lp_variable_bounds_for(mip_node_t* node_ptr, + const simplex_solver_settings_t& settings) + { + // Reset the bound_changed markers + std::fill(bounds_changed.begin(), bounds_changed.end(), false); + + // Set the correct bounds for the leaf problem + if (recompute_bounds) { + leaf_problem.lower = start_lower; + leaf_problem.upper = start_upper; + node_ptr->get_variable_bounds(leaf_problem.lower, leaf_problem.upper, bounds_changed); + + } else { + node_ptr->update_branched_variable_bounds( + leaf_problem.lower, leaf_problem.upper, bounds_changed); + } + + return node_presolver.bounds_strengthening( + leaf_problem.lower, leaf_problem.upper, bounds_changed, settings); + } + + private: + // For diving, we need to store the full node instead of + // of just a pointer, since it is not store in the tree anymore. + // To keep the same interface across all worker types, + // this will be used as a temporary storage and + // will be pointed by `start_node`. + // For exploration, this will not be used. + mip_node_t internal_node; +}; + +template +class bnb_worker_pool_t { + public: + void init(i_t num_workers, + const lp_problem_t& original_lp, + const csr_matrix_t& Arow, + const std::vector& var_type, + const simplex_solver_settings_t& settings) + { + workers_.resize(num_workers); + num_idle_workers_ = num_workers; + for (i_t i = 0; i < num_workers; ++i) { + workers_[i] = + std::make_unique>(i, original_lp, Arow, var_type, settings); + idle_workers_.push_front(i); + } + } + + bnb_worker_data_t* get_idle_worker() + { + std::lock_guard lock(mutex_); + + if (idle_workers_.empty()) { + return nullptr; + } else { + i_t idx = idle_workers_.front(); + return workers_[idx].get(); + } + } + + void pop_idle_worker() + { + std::lock_guard lock(mutex_); + if (!idle_workers_.empty()) { + idle_workers_.pop_front(); + num_idle_workers_--; + } + } + + bnb_worker_data_t* get_and_pop_idle_worker() + { + std::lock_guard lock(mutex_); + + if (idle_workers_.empty()) { + return nullptr; + } else { + i_t idx = idle_workers_.front(); + idle_workers_.pop_front(); + num_idle_workers_--; + return workers_[idx].get(); + } + } + + void return_worker_to_pool(bnb_worker_data_t* worker) + { + worker->is_active = false; + std::lock_guard lock(mutex_); + idle_workers_.push_back(worker->worker_id); + num_idle_workers_++; + } + + f_t get_lower_bounds() + { + f_t lower_bound = std::numeric_limits::infinity(); + + for (i_t i = 0; i < workers_.size(); ++i) { + if (workers_[i]->worker_type == EXPLORATION && workers_[i]->is_active) { + lower_bound = std::min(workers_[i]->lower_bound.load(), lower_bound); + } + } + + return lower_bound; + } + + i_t num_idle_workers() { return num_idle_workers_; } + + private: + // Worker pool + std::vector>> workers_; + + omp_mutex_t mutex_; + std::deque idle_workers_; + omp_atomic_t num_idle_workers_; +}; + +template +std::vector bnb_get_worker_types(diving_heuristics_settings_t settings) +{ + std::vector types; + types.reserve(bnb_num_worker_types); + types.push_back(EXPLORATION); + if (!settings.disable_pseudocost_diving) { types.push_back(PSEUDOCOST_DIVING); } + if (!settings.disable_line_search_diving) { types.push_back(LINE_SEARCH_DIVING); } + if (!settings.disable_guided_diving) { types.push_back(GUIDED_DIVING); } + if (!settings.disable_coefficient_diving) { types.push_back(COEFFICIENT_DIVING); } + return types; +} + +template +std::array bnb_get_num_workers_round_robin( + i_t num_threads, diving_heuristics_settings_t settings) +{ + std::array max_num_workers; + auto worker_types = bnb_get_worker_types(settings); + + max_num_workers.fill(0); + max_num_workers[EXPLORATION] = std::max(1, num_threads / 2); + + i_t diving_workers = 2 * settings.num_diving_workers; + i_t m = worker_types.size() - 1; + + for (size_t i = 1, k = 0; i < worker_types.size(); ++i) { + i_t start = (double)k * diving_workers / m; + i_t end = (double)(k + 1) * diving_workers / m; + max_num_workers[worker_types[i]] = end - start; + ++k; + } + + return max_num_workers; +} + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/bounds_strengthening.cpp b/cpp/src/dual_simplex/bounds_strengthening.cpp index f1bf52c1e..712f25837 100644 --- a/cpp/src/dual_simplex/bounds_strengthening.cpp +++ b/cpp/src/dual_simplex/bounds_strengthening.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -59,8 +59,7 @@ bounds_strengthening_t::bounds_strengthening_t( const csr_matrix_t& Arow, const std::vector& row_sense, const std::vector& var_types) - : bounds_changed(problem.num_cols, false), - A(problem.A), + : A(problem.A), Arow(Arow), var_types(var_types), delta_min_activity(problem.num_rows), @@ -93,6 +92,7 @@ template bool bounds_strengthening_t::bounds_strengthening( std::vector& lower_bounds, std::vector& upper_bounds, + const std::vector& bounds_changed, const simplex_solver_settings_t& settings) { const i_t m = A.m; @@ -154,7 +154,7 @@ bool bounds_strengthening_t::bounds_strengthening( bool is_infeasible = check_infeasibility(min_a, max_a, cnst_lb, cnst_ub, settings.primal_tol); if (is_infeasible) { - settings.log.printf( + settings.log.debug( "Iter:: %d, Infeasible constraint %d, cnst_lb %e, cnst_ub %e, min_a %e, max_a %e\n", iter, i, @@ -211,7 +211,7 @@ bool bounds_strengthening_t::bounds_strengthening( new_ub = std::min(new_ub, upper_bounds[k]); if (new_lb > new_ub + 1e-6) { - settings.log.printf( + settings.log.debug( "Iter:: %d, Infeasible variable after update %d, %e > %e\n", iter, k, new_lb, new_ub); return false; } diff --git a/cpp/src/dual_simplex/bounds_strengthening.hpp b/cpp/src/dual_simplex/bounds_strengthening.hpp index e7e218b82..6a43ef44c 100644 --- a/cpp/src/dual_simplex/bounds_strengthening.hpp +++ b/cpp/src/dual_simplex/bounds_strengthening.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -22,10 +22,9 @@ class bounds_strengthening_t { bool bounds_strengthening(std::vector& lower_bounds, std::vector& upper_bounds, + const std::vector& bounds_changed, const simplex_solver_settings_t& settings); - std::vector bounds_changed; - private: const csc_matrix_t& A; const csr_matrix_t& Arow; diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 6161f4d3f..d6a95e82e 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -1,14 +1,13 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ -#include -#include -#include #include + +#include #include #include #include @@ -20,6 +19,9 @@ #include #include +#include + +#include #include #include #include @@ -183,30 +185,27 @@ std::string user_mip_gap(f_t obj_value, f_t lower_bound) { const f_t user_mip_gap = relative_gap(obj_value, lower_bound); if (user_mip_gap == std::numeric_limits::infinity()) { - return " - "; + return " - "; } else { constexpr int BUFFER_LEN = 32; char buffer[BUFFER_LEN]; - snprintf(buffer, BUFFER_LEN - 1, "%4.1f%%", user_mip_gap * 100); + snprintf(buffer, BUFFER_LEN - 1, "%5.1f%%", user_mip_gap * 100); return std::string(buffer); } } -inline const char* feasible_solution_symbol(thread_type_t type) +inline const char* feasible_solution_symbol(bnb_worker_type_t type) { switch (type) { - case thread_type_t::EXPLORATION: return "B"; - case thread_type_t::DIVING: return "D"; - default: return "U"; + case bnb_worker_type_t::EXPLORATION: return "B "; + case bnb_worker_type_t::COEFFICIENT_DIVING: return "D "; + case bnb_worker_type_t::LINE_SEARCH_DIVING: return "D "; + case bnb_worker_type_t::PSEUDOCOST_DIVING: return "D "; + case bnb_worker_type_t::GUIDED_DIVING: return "D "; + default: return "U "; } } -inline bool has_children(node_solve_info_t status) -{ - return status == node_solve_info_t::UP_CHILD_FIRST || - status == node_solve_info_t::DOWN_CHILD_FIRST; -} - } // namespace template @@ -216,53 +215,74 @@ branch_and_bound_t::branch_and_bound_t( : original_problem_(user_problem), settings_(solver_settings), original_lp_(user_problem.handle_ptr, 1, 1, 1), + Arow_(1, 1, 0), incumbent_(1), root_relax_soln_(1, 1), root_crossover_soln_(1, 1), pc_(1), - solver_status_(mip_exploration_status_t::UNSET) + solver_status_(mip_status_t::UNSET) { exploration_stats_.start_time = tic(); dualize_info_t dualize_info; convert_user_problem(original_problem_, settings_, original_lp_, new_slacks_, dualize_info); full_variable_types(original_problem_, original_lp_, var_types_); - mutex_upper_.lock(); upper_bound_ = inf; - mutex_upper_.unlock(); } template -f_t branch_and_bound_t::get_upper_bound() +f_t branch_and_bound_t::get_lower_bound() { - mutex_upper_.lock(); - const f_t upper_bound = upper_bound_; - mutex_upper_.unlock(); - return upper_bound; + f_t lower_bound = lower_bound_ceiling_.load(); + f_t heap_lower_bound = node_queue_.get_lower_bound(); + lower_bound = std::min(heap_lower_bound, lower_bound); + lower_bound = std::min(worker_pool_.get_lower_bounds(), lower_bound); + return std::isfinite(lower_bound) ? lower_bound : -inf; } template -f_t branch_and_bound_t::get_lower_bound() +void branch_and_bound_t::report_heuristic(f_t obj) { - f_t lower_bound = lower_bound_ceiling_.load(); - mutex_heap_.lock(); - if (heap_.size() > 0) { lower_bound = std::min(heap_.top()->lower_bound, lower_bound); } - mutex_heap_.unlock(); + if (is_running) { + f_t user_obj = compute_user_objective(original_lp_, obj); + f_t user_lower = compute_user_objective(original_lp_, get_lower_bound()); + std::string user_gap = user_mip_gap(user_obj, user_lower); - for (i_t i = 0; i < local_lower_bounds_.size(); ++i) { - lower_bound = std::min(local_lower_bounds_[i].load(), lower_bound); + settings_.log.printf( + "H %+13.6e %+10.6e %s %9.2f\n", + user_obj, + user_lower, + user_gap.c_str(), + toc(exploration_stats_.start_time)); + } else { + settings_.log.printf("New solution from primal heuristics. Objective %+.6e. Time %.2f\n", + compute_user_objective(original_lp_, obj), + toc(exploration_stats_.start_time)); } - - return lower_bound; } template -i_t branch_and_bound_t::get_heap_size() +void branch_and_bound_t::report(std::string symbol, + f_t obj, + f_t lower_bound, + i_t node_depth) { - mutex_heap_.lock(); - i_t size = heap_.size(); - mutex_heap_.unlock(); - return size; + i_t nodes_explored = exploration_stats_.nodes_explored; + i_t nodes_unexplored = exploration_stats_.nodes_unexplored; + f_t user_obj = compute_user_objective(original_lp_, obj); + f_t user_lower = compute_user_objective(original_lp_, lower_bound); + f_t iter_node = exploration_stats_.total_lp_iters / nodes_explored; + std::string user_gap = user_mip_gap(user_obj, user_lower); + settings_.log.printf("%s%10d %10lu %+13.6e %+10.6e %6d %7.1e %s %9.2f\n", + symbol.c_str(), + nodes_explored, + nodes_unexplored, + user_obj, + user_lower, + node_depth, + iter_node, + user_gap.c_str(), + toc(exploration_stats_.start_time)); } template @@ -303,25 +323,7 @@ void branch_and_bound_t::set_new_solution(const std::vector& solu } mutex_upper_.unlock(); - if (is_feasible) { - if (solver_status_ == mip_exploration_status_t::RUNNING) { - f_t user_obj = compute_user_objective(original_lp_, obj); - f_t user_lower = compute_user_objective(original_lp_, get_lower_bound()); - std::string gap = user_mip_gap(user_obj, user_lower); - - settings_.log.printf( - "H %+13.6e %+10.6e %s %9.2f\n", - user_obj, - user_lower, - gap.c_str(), - toc(exploration_stats_.start_time)); - } else { - settings_.log.printf("New solution from primal heuristics. Objective %+.6e. Time %.2f\n", - compute_user_objective(original_lp_, obj), - toc(exploration_stats_.start_time)); - } - } - + if (is_feasible) { report_heuristic(obj); } if (attempt_repair) { mutex_repair_.lock(); repair_queue_.push_back(crushed_solution); @@ -417,17 +419,7 @@ void branch_and_bound_t::repair_heuristic_solutions() if (repaired_obj < upper_bound_) { upper_bound_ = repaired_obj; incumbent_.set_incumbent_solution(repaired_obj, repaired_solution); - - f_t obj = compute_user_objective(original_lp_, repaired_obj); - f_t lower = compute_user_objective(original_lp_, get_lower_bound()); - std::string user_gap = user_mip_gap(obj, lower); - - settings_.log.printf( - "H %+13.6e %+10.6e %s %9.2f\n", - obj, - lower, - user_gap.c_str(), - toc(exploration_stats_.start_time)); + report_heuristic(repaired_obj); if (settings_.solution_callback != nullptr) { std::vector original_x; @@ -443,30 +435,24 @@ void branch_and_bound_t::repair_heuristic_solutions() } template -mip_status_t branch_and_bound_t::set_final_solution(mip_solution_t& solution, - f_t lower_bound) +void branch_and_bound_t::set_final_solution(mip_solution_t& solution, + f_t lower_bound) { - mip_status_t mip_status = mip_status_t::UNSET; - - if (solver_status_ == mip_exploration_status_t::NUMERICAL) { + if (solver_status_ == mip_status_t::NUMERICAL) { settings_.log.printf("Numerical issue encountered. Stopping the solver...\n"); - mip_status = mip_status_t::NUMERICAL; } - if (solver_status_ == mip_exploration_status_t::TIME_LIMIT) { + if (solver_status_ == mip_status_t::TIME_LIMIT) { settings_.log.printf("Time limit reached. Stopping the solver...\n"); - mip_status = mip_status_t::TIME_LIMIT; } - if (solver_status_ == mip_exploration_status_t::NODE_LIMIT) { + if (solver_status_ == mip_status_t::NODE_LIMIT) { settings_.log.printf("Node limit reached. Stopping the solver...\n"); - mip_status = mip_status_t::NODE_LIMIT; } - f_t upper_bound = get_upper_bound(); - f_t gap = upper_bound - lower_bound; - f_t obj = compute_user_objective(original_lp_, upper_bound); + f_t gap = upper_bound_ - lower_bound; + f_t obj = compute_user_objective(original_lp_, upper_bound_.load()); f_t user_bound = compute_user_objective(original_lp_, lower_bound); - f_t gap_rel = user_relative_gap(original_lp_, upper_bound, lower_bound); + f_t gap_rel = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); bool is_maximization = original_lp_.obj_scale < 0.0; settings_.log.printf("Explored %d nodes in %.2fs.\n", @@ -479,7 +465,7 @@ mip_status_t branch_and_bound_t::set_final_solution(mip_solution_t 0 && gap <= settings_.absolute_mip_gap_tol) { settings_.log.printf("Optimal solution found within absolute MIP gap tolerance (%.1e)\n", settings_.absolute_mip_gap_tol); @@ -494,18 +480,18 @@ mip_status_t branch_and_bound_t::set_final_solution(mip_solution_t 0 && exploration_stats_.nodes_unexplored == 0 && - upper_bound == inf) { + upper_bound_ == inf) { settings_.log.printf("Integer infeasible.\n"); - mip_status = mip_status_t::INFEASIBLE; + solver_status_ = mip_status_t::INFEASIBLE; if (settings_.heuristic_preemption_callback != nullptr) { settings_.heuristic_preemption_callback(); } } } - if (upper_bound != inf) { + if (upper_bound_ != inf) { assert(incumbent_.has_incumbent); uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x); } @@ -513,39 +499,25 @@ mip_status_t branch_and_bound_t::set_final_solution(mip_solution_t void branch_and_bound_t::add_feasible_solution(f_t leaf_objective, const std::vector& leaf_solution, i_t leaf_depth, - thread_type_t thread_type) + bnb_worker_type_t thread_type) { - bool send_solution = false; - i_t nodes_explored = exploration_stats_.nodes_explored; - i_t nodes_unexplored = exploration_stats_.nodes_unexplored; + bool send_solution = false; + + settings_.log.debug("%s found a feasible solution with obj=%.10e.\n", + feasible_solution_symbol(thread_type), + compute_user_objective(original_lp_, leaf_objective)); mutex_upper_.lock(); if (leaf_objective < upper_bound_) { incumbent_.set_incumbent_solution(leaf_objective, leaf_solution); - upper_bound_ = leaf_objective; - f_t lower_bound = get_lower_bound(); - f_t obj = compute_user_objective(original_lp_, upper_bound_); - f_t lower = compute_user_objective(original_lp_, lower_bound); - settings_.log.printf( - "%s%10d %10lu %+13.6e %+10.6e %6d %7.1e %s %9.2f\n", - feasible_solution_symbol(thread_type), - nodes_explored, - nodes_unexplored, - obj, - lower, - leaf_depth, - nodes_explored > 0 ? exploration_stats_.total_lp_iters / nodes_explored : 0, - user_mip_gap(obj, lower).c_str(), - toc(exploration_stats_.start_time)); - + upper_bound_ = leaf_objective; + report(feasible_solution_symbol(thread_type), leaf_objective, get_lower_bound(), leaf_depth); send_solution = true; } @@ -557,16 +529,18 @@ void branch_and_bound_t::add_feasible_solution(f_t leaf_objective, mutex_upper_.unlock(); } -template -rounding_direction_t branch_and_bound_t::child_selection(mip_node_t* node_ptr) +// Martin's criteria for the preferred rounding direction (see [1]) +// [1] A. Martin, “Integer Programs with Block Structure,” +// Technische Universit¨at Berlin, Berlin, 1999. Accessed: Aug. 08, 2025. +// [Online]. Available: https://opus4.kobv.de/opus4-zib/frontdoor/index/index/docId/391 +template +rounding_direction_t martin_criteria(f_t val, f_t root_val) { - const i_t branch_var = node_ptr->get_down_child()->branch_var; - const f_t branch_var_val = node_ptr->get_down_child()->fractional_val; - const f_t down_val = std::floor(root_relax_soln_.x[branch_var]); - const f_t up_val = std::ceil(root_relax_soln_.x[branch_var]); - const f_t down_dist = branch_var_val - down_val; - const f_t up_dist = up_val - branch_var_val; - constexpr f_t eps = 1e-6; + const f_t down_val = std::floor(root_val); + const f_t up_val = std::ceil(root_val); + const f_t down_dist = val - down_val; + const f_t up_dist = up_val - val; + constexpr f_t eps = 1e-6; if (down_dist < up_dist + eps) { return rounding_direction_t::DOWN; @@ -577,128 +551,193 @@ rounding_direction_t branch_and_bound_t::child_selection(mip_node_t -node_solve_info_t branch_and_bound_t::solve_node( +branch_variable_t branch_and_bound_t::variable_selection( mip_node_t* node_ptr, - search_tree_t& search_tree, - lp_problem_t& leaf_problem, - basis_update_mpf_t& basis_factors, - std::vector& basic_list, - std::vector& nonbasic_list, - bounds_strengthening_t& node_presolver, - thread_type_t thread_type, - bool recompute_bounds_and_basis, - const std::vector& root_lower, - const std::vector& root_upper, - logger_t& log) + const std::vector& fractional, + bnb_worker_data_t* worker_data) { - const f_t abs_fathom_tol = settings_.absolute_mip_gap_tol / 10; - const f_t upper_bound = get_upper_bound(); + logger_t log; + log.log = false; + i_t branch_var = -1; + rounding_direction_t round_dir = rounding_direction_t::NONE; + std::vector current_incumbent; + std::vector& solution = worker_data->leaf_solution.x; + + switch (worker_data->worker_type) { + case bnb_worker_type_t::EXPLORATION: + + // RINS/SubMIP path + if (!enable_concurrent_lp_root_solve()) { + branch_var = pc_.variable_selection(fractional, solution, log); + } else { + branch_var = pc_.reliable_variable_selection(worker_data->leaf_problem, + settings_, + var_types_, + node_ptr->vstatus, + worker_data->leaf_edge_norms, + fractional, + solution, + worker_data->basis_factors, + worker_data->basic_list, + worker_data->nonbasic_list, + node_ptr->lower_bound, + upper_bound_, + exploration_stats_.total_lp_iters, + exploration_stats_.nodes_explored, + log); + } + + round_dir = martin_criteria(solution[branch_var], root_relax_soln_.x[branch_var]); - lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); + // Note that the exploration thread is the only one that can insert new nodes into the heap, + // and thus, we only need to calculate the objective estimate here (it is used for + // sorting the nodes for diving). + node_ptr->objective_estimate = + pc_.obj_estimate(fractional, solution, node_ptr->lower_bound, log); + return {branch_var, round_dir}; + + case bnb_worker_type_t::COEFFICIENT_DIVING: + return coefficient_diving(original_lp_, fractional, solution, log); + + case bnb_worker_type_t::LINE_SEARCH_DIVING: + return line_search_diving(fractional, solution, root_relax_soln_.x, log); + + case bnb_worker_type_t::PSEUDOCOST_DIVING: + return pseudocost_diving(pc_, fractional, solution, root_relax_soln_.x, log); + + case bnb_worker_type_t::GUIDED_DIVING: + mutex_upper_.lock(); + current_incumbent = incumbent_.x; + mutex_upper_.unlock(); + return guided_diving(pc_, fractional, solution, current_incumbent, log); + + default: + log.debug("Unknown variable selection method: %d\n", worker_data->worker_type); + return {-1, rounding_direction_t::NONE}; + } +} + +template +dual::status_t branch_and_bound_t::solve_node_lp(mip_node_t* node_ptr, + bnb_worker_data_t* worker_data, + bnb_stats_t& stats, + logger_t& log) +{ std::vector& leaf_vstatus = node_ptr->vstatus; - assert(leaf_vstatus.size() == leaf_problem.num_cols); + assert(leaf_vstatus.size() == worker_data->leaf_problem.num_cols); simplex_solver_settings_t lp_settings = settings_; lp_settings.set_log(false); - lp_settings.cut_off = upper_bound + settings_.dual_tol; + lp_settings.cut_off = upper_bound_ + settings_.dual_tol; lp_settings.inside_mip = 2; lp_settings.time_limit = settings_.time_limit - toc(exploration_stats_.start_time); lp_settings.scale_columns = false; + if (worker_data->worker_type != bnb_worker_type_t::EXPLORATION) { + i_t bnb_lp_iters = exploration_stats_.total_lp_iters; + f_t factor = settings_.diving_settings.iteration_limit_factor; + f_t max_iter = factor * bnb_lp_iters; + lp_settings.iteration_limit = max_iter - stats.total_lp_iters; + if (lp_settings.iteration_limit <= 0) { return dual::status_t::ITERATION_LIMIT; } + } + #ifdef LOG_NODE_SIMPLEX lp_settings.set_log(true); std::stringstream ss; ss << "simplex-" << std::this_thread::get_id() << ".log"; std::string logname; ss >> logname; - lp_settings.set_log_filename(logname); - lp_settings.log.enable_log_to_file("a+"); + lp_settings.log.set_log_file(logname, "a"); lp_settings.log.log_to_console = false; lp_settings.log.printf( - "%s node id = %d, branch var = %d, fractional val = %f, variable lower bound = %f, variable " - "upper bound = %f\n", + "%scurrent node: id = %d, depth = %d, branch var = %d, branch dir = %s, fractional val = " + "%f, variable lower bound = %f, variable upper bound = %f, branch vstatus = %d\n\n", settings_.log.log_prefix.c_str(), node_ptr->node_id, + node_ptr->depth, node_ptr->branch_var, + node_ptr->branch_dir == rounding_direction_t::DOWN ? "DOWN" : "UP", node_ptr->fractional_val, node_ptr->branch_var_lower, - node_ptr->branch_var_upper); + node_ptr->branch_var_upper, + node_ptr->vstatus[node_ptr->branch_var]); #endif - // Reset the bound_changed markers - std::fill(node_presolver.bounds_changed.begin(), node_presolver.bounds_changed.end(), false); - - // Set the correct bounds for the leaf problem - if (recompute_bounds_and_basis) { - leaf_problem.lower = root_lower; - leaf_problem.upper = root_upper; - node_ptr->get_variable_bounds( - leaf_problem.lower, leaf_problem.upper, node_presolver.bounds_changed); - - } else { - node_ptr->update_branched_variable_bounds( - leaf_problem.lower, leaf_problem.upper, node_presolver.bounds_changed); - } - - bool feasible = - node_presolver.bounds_strengthening(leaf_problem.lower, leaf_problem.upper, lp_settings); + bool is_feasible = worker_data->set_lp_variable_bounds_for(node_ptr, settings_); + dual::status_t lp_status = dual::status_t::DUAL_UNBOUNDED; + worker_data->leaf_edge_norms = edge_norms_; // = node.steepest_edge_norms; - dual::status_t lp_status = dual::status_t::DUAL_UNBOUNDED; - - if (feasible) { - i_t node_iter = 0; - f_t lp_start_time = tic(); - std::vector leaf_edge_norms = edge_norms_; // = node.steepest_edge_norms; + if (is_feasible) { + i_t node_iter = 0; + f_t lp_start_time = tic(); lp_status = dual_phase2_with_advanced_basis(2, 0, - recompute_bounds_and_basis, + worker_data->recompute_basis, lp_start_time, - leaf_problem, + worker_data->leaf_problem, lp_settings, leaf_vstatus, - basis_factors, - basic_list, - nonbasic_list, - leaf_solution, + worker_data->basis_factors, + worker_data->basic_list, + worker_data->nonbasic_list, + worker_data->leaf_solution, node_iter, - leaf_edge_norms); + worker_data->leaf_edge_norms); if (lp_status == dual::status_t::NUMERICAL) { log.printf("Numerical issue node %d. Resolving from scratch.\n", node_ptr->node_id); - lp_status_t second_status = solve_linear_program_with_advanced_basis(leaf_problem, - lp_start_time, - lp_settings, - leaf_solution, - basis_factors, - basic_list, - nonbasic_list, - leaf_vstatus, - leaf_edge_norms); + lp_status_t second_status = + solve_linear_program_with_advanced_basis(worker_data->leaf_problem, + lp_start_time, + lp_settings, + worker_data->leaf_solution, + worker_data->basis_factors, + worker_data->basic_list, + worker_data->nonbasic_list, + leaf_vstatus, + worker_data->leaf_edge_norms); lp_status = convert_lp_status_to_dual_status(second_status); } - if (thread_type == thread_type_t::EXPLORATION) { - exploration_stats_.total_lp_solve_time += toc(lp_start_time); - exploration_stats_.total_lp_iters += node_iter; - } + stats.total_lp_solve_time += toc(lp_start_time); + stats.total_lp_iters += node_iter; } +#ifdef LOG_NODE_SIMPLEX + lp_settings.log.printf("\nLP status: %d\n\n", lp_status); +#endif + + return lp_status; +} +template +std::pair branch_and_bound_t::update_tree( + mip_node_t* node_ptr, + search_tree_t& search_tree, + bnb_worker_data_t* worker_data, + dual::status_t lp_status, + logger_t& log) +{ + const f_t abs_fathom_tol = settings_.absolute_mip_gap_tol / 10; + std::vector& leaf_vstatus = node_ptr->vstatus; + lp_problem_t& leaf_problem = worker_data->leaf_problem; + lp_solution_t& leaf_solution = worker_data->leaf_solution; + if (lp_status == dual::status_t::DUAL_UNBOUNDED) { // Node was infeasible. Do not branch node_ptr->lower_bound = inf; search_tree.graphviz_node(log, node_ptr, "infeasible", 0.0); search_tree.update(node_ptr, node_status_t::INFEASIBLE); - return node_solve_info_t::NO_CHILDREN; + return {node_status_t::INFEASIBLE, rounding_direction_t::NONE}; } else if (lp_status == dual::status_t::CUTOFF) { // Node was cut off. Do not branch - node_ptr->lower_bound = upper_bound; + node_ptr->lower_bound = upper_bound_; f_t leaf_objective = compute_objective(leaf_problem, leaf_solution.x); search_tree.graphviz_node(log, node_ptr, "cut off", leaf_objective); search_tree.update(node_ptr, node_status_t::FATHOMED); - return node_solve_info_t::NO_CHILDREN; + return {node_status_t::FATHOMED, rounding_direction_t::NONE}; } else if (lp_status == dual::status_t::OPTIMAL) { // LP was feasible @@ -711,48 +750,42 @@ node_solve_info_t branch_and_bound_t::solve_node( search_tree.graphviz_node(log, node_ptr, "lower bound", leaf_objective); pc_.update_pseudo_costs(node_ptr, leaf_objective); - if (settings_.node_processed_callback != nullptr) { - std::vector original_x; - uncrush_primal_solution(original_problem_, original_lp_, leaf_solution.x, original_x); - settings_.node_processed_callback(original_x, leaf_objective); + if (worker_data->worker_type == bnb_worker_type_t::EXPLORATION) { + if (settings_.node_processed_callback != nullptr) { + std::vector original_x; + uncrush_primal_solution(original_problem_, original_lp_, leaf_solution.x, original_x); + settings_.node_processed_callback(original_x, leaf_objective); + } } if (leaf_num_fractional == 0) { // Found a integer feasible solution - add_feasible_solution(leaf_objective, leaf_solution.x, node_ptr->depth, thread_type); + add_feasible_solution( + leaf_objective, leaf_solution.x, node_ptr->depth, worker_data->worker_type); search_tree.graphviz_node(log, node_ptr, "integer feasible", leaf_objective); search_tree.update(node_ptr, node_status_t::INTEGER_FEASIBLE); - return node_solve_info_t::NO_CHILDREN; + return {node_status_t::INTEGER_FEASIBLE, rounding_direction_t::NONE}; - } else if (leaf_objective <= upper_bound + abs_fathom_tol) { + } else if (leaf_objective <= upper_bound_ + abs_fathom_tol) { // Choose fractional variable to branch on - const i_t branch_var = - pc_.variable_selection(leaf_fractional, leaf_solution.x, lp_settings.log); + auto [branch_var, round_dir] = variable_selection(node_ptr, leaf_fractional, worker_data); assert(leaf_vstatus.size() == leaf_problem.num_cols); + assert(branch_var >= 0); + assert(round_dir != rounding_direction_t::NONE); + search_tree.branch( node_ptr, branch_var, leaf_solution.x[branch_var], leaf_vstatus, leaf_problem, log); search_tree.update(node_ptr, node_status_t::HAS_CHILDREN); - - rounding_direction_t round_dir = child_selection(node_ptr); - - if (round_dir == rounding_direction_t::UP) { - return node_solve_info_t::UP_CHILD_FIRST; - } else { - return node_solve_info_t::DOWN_CHILD_FIRST; - } + return {node_status_t::HAS_CHILDREN, round_dir}; } else { search_tree.graphviz_node(log, node_ptr, "fathomed", leaf_objective); search_tree.update(node_ptr, node_status_t::FATHOMED); - return node_solve_info_t::NO_CHILDREN; + return {node_status_t::FATHOMED, rounding_direction_t::NONE}; } - } else if (lp_status == dual::status_t::TIME_LIMIT) { - search_tree.graphviz_node(log, node_ptr, "timeout", 0.0); - return node_solve_info_t::TIME_LIMIT; - } else { - if (thread_type == thread_type_t::EXPLORATION) { + if (worker_data->worker_type == bnb_worker_type_t::EXPLORATION) { fetch_min(lower_bound_ceiling_, node_ptr->lower_bound); log.printf( "LP returned status %d on node %d. This indicates a numerical issue. The best bound is set " @@ -765,149 +798,24 @@ node_solve_info_t branch_and_bound_t::solve_node( search_tree.graphviz_node(log, node_ptr, "numerical", 0.0); search_tree.update(node_ptr, node_status_t::NUMERICAL); - return node_solve_info_t::NUMERICAL; - } -} - -template -void branch_and_bound_t::exploration_ramp_up(mip_node_t* node, - search_tree_t* search_tree, - const csr_matrix_t& Arow, - i_t initial_heap_size) -{ - if (solver_status_ != mip_exploration_status_t::RUNNING) { return; } - - // Note that we do not know which thread will execute the - // `exploration_ramp_up` task, so we allow to any thread - // to repair the heuristic solution. - repair_heuristic_solutions(); - - f_t lower_bound = node->lower_bound; - f_t upper_bound = get_upper_bound(); - f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); - f_t abs_gap = upper_bound - lower_bound; - - if (lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { - search_tree->graphviz_node(settings_.log, node, "cutoff", node->lower_bound); - search_tree->update(node, node_status_t::FATHOMED); - --exploration_stats_.nodes_unexplored; - return; - } - - f_t now = toc(exploration_stats_.start_time); - f_t time_since_last_log = - exploration_stats_.last_log == 0 ? 1.0 : toc(exploration_stats_.last_log); - - if (((exploration_stats_.nodes_since_last_log >= 10 || - abs_gap < 10 * settings_.absolute_mip_gap_tol) && - (time_since_last_log >= 1)) || - (time_since_last_log > 30) || now > settings_.time_limit) { - bool should_report = should_report_.exchange(false); - - if (should_report) { - f_t obj = compute_user_objective(original_lp_, upper_bound); - f_t user_lower = compute_user_objective(original_lp_, root_objective_); - std::string gap_user = user_mip_gap(obj, user_lower); - i_t nodes_explored = exploration_stats_.nodes_explored; - i_t nodes_unexplored = exploration_stats_.nodes_unexplored; - - settings_.log.printf( - " %10d %10lu %+13.6e %+10.6e %6d %7.1e %s %9.2f\n", - nodes_explored, - nodes_unexplored, - obj, - user_lower, - node->depth, - nodes_explored > 0 ? exploration_stats_.total_lp_iters / nodes_explored : 0, - gap_user.c_str(), - now); - - exploration_stats_.nodes_since_last_log = 0; - exploration_stats_.last_log = tic(); - should_report_ = true; - } - } - - if (now > settings_.time_limit) { - solver_status_ = mip_exploration_status_t::TIME_LIMIT; - return; - } - - // Make a copy of the original LP. We will modify its bounds at each leaf - lp_problem_t leaf_problem = original_lp_; - std::vector row_sense; - bounds_strengthening_t node_presolver(leaf_problem, Arow, row_sense, var_types_); - - const i_t m = leaf_problem.num_rows; - basis_update_mpf_t basis_factors(m, settings_.refactor_frequency); - std::vector basic_list(m); - std::vector nonbasic_list; - - node_solve_info_t status = solve_node(node, - *search_tree, - leaf_problem, - basis_factors, - basic_list, - nonbasic_list, - node_presolver, - thread_type_t::EXPLORATION, - true, - original_lp_.lower, - original_lp_.upper, - settings_.log); - - ++exploration_stats_.nodes_since_last_log; - ++exploration_stats_.nodes_explored; - --exploration_stats_.nodes_unexplored; - - if (status == node_solve_info_t::TIME_LIMIT) { - solver_status_ = mip_exploration_status_t::TIME_LIMIT; - return; - - } else if (has_children(status)) { - exploration_stats_.nodes_unexplored += 2; - - // If we haven't generated enough nodes to keep the threads busy, continue the ramp up phase - if (exploration_stats_.nodes_unexplored < initial_heap_size) { -#pragma omp task - exploration_ramp_up(node->get_down_child(), search_tree, Arow, initial_heap_size); - -#pragma omp task - exploration_ramp_up(node->get_up_child(), search_tree, Arow, initial_heap_size); - - } else { - // We've generated enough nodes, push further nodes onto the heap - mutex_heap_.lock(); - heap_.push(node->get_down_child()); - heap_.push(node->get_up_child()); - mutex_heap_.unlock(); - } + return {node_status_t::NUMERICAL, rounding_direction_t::NONE}; } } template -void branch_and_bound_t::explore_subtree(i_t task_id, - mip_node_t* start_node, - search_tree_t& search_tree, - lp_problem_t& leaf_problem, - bounds_strengthening_t& node_presolver, - basis_update_mpf_t& basis_factors, - std::vector& basic_list, - std::vector& nonbasic_list) +void branch_and_bound_t::plunge_with(bnb_worker_data_t* worker_data) { - bool recompute_bounds_and_basis = true; std::deque*> stack; - stack.push_front(start_node); - - while (stack.size() > 0 && solver_status_ == mip_exploration_status_t::RUNNING) { - if (task_id == 0) { repair_heuristic_solutions(); } + stack.push_front(worker_data->start_node); + worker_data->recompute_basis = true; + worker_data->recompute_bounds = true; + while (stack.size() > 0 && solver_status_ == mip_status_t::UNSET) { mip_node_t* node_ptr = stack.front(); stack.pop_front(); f_t lower_bound = node_ptr->lower_bound; - f_t upper_bound = get_upper_bound(); - f_t abs_gap = upper_bound - lower_bound; + f_t upper_bound = upper_bound_; f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); // This is based on three assumptions: @@ -916,80 +824,48 @@ void branch_and_bound_t::explore_subtree(i_t task_id, // - The current node and its siblings uses the lower bound of the parent before solving the LP // relaxation // - The lower bound of the parent is lower or equal to its children - assert(task_id < local_lower_bounds_.size()); - local_lower_bounds_[task_id] = lower_bound; + worker_data->lower_bound = lower_bound; if (lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { - search_tree.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound); - search_tree.update(node_ptr, node_status_t::FATHOMED); + search_tree_.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound); + search_tree_.update(node_ptr, node_status_t::FATHOMED); + worker_data->recompute_basis = true; + worker_data->recompute_bounds = true; --exploration_stats_.nodes_unexplored; continue; } - f_t now = toc(exploration_stats_.start_time); - - if (task_id == 0) { - f_t time_since_last_log = - exploration_stats_.last_log == 0 ? 1.0 : toc(exploration_stats_.last_log); - - if (((exploration_stats_.nodes_since_last_log >= 1000 || - abs_gap < 10 * settings_.absolute_mip_gap_tol) && - time_since_last_log >= 1) || - (time_since_last_log > 30) || now > settings_.time_limit) { - f_t obj = compute_user_objective(original_lp_, upper_bound); - f_t user_lower = compute_user_objective(original_lp_, get_lower_bound()); - std::string gap_user = user_mip_gap(obj, user_lower); - i_t nodes_explored = exploration_stats_.nodes_explored; - i_t nodes_unexplored = exploration_stats_.nodes_unexplored; - - settings_.log.printf( - " %10d %10lu %+13.6e %+10.6e %6d %7.1e %s %9.2f\n", - nodes_explored, - nodes_unexplored, - obj, - user_lower, - node_ptr->depth, - nodes_explored > 0 ? exploration_stats_.total_lp_iters / nodes_explored : 0, - gap_user.c_str(), - now); - exploration_stats_.last_log = tic(); - exploration_stats_.nodes_since_last_log = 0; - } + if (toc(exploration_stats_.start_time) > settings_.time_limit) { + solver_status_ = mip_status_t::TIME_LIMIT; + break; } - if (now > settings_.time_limit) { - solver_status_ = mip_exploration_status_t::TIME_LIMIT; - return; - } if (exploration_stats_.nodes_explored >= settings_.node_limit) { - solver_status_ = mip_exploration_status_t::NODE_LIMIT; - return; + solver_status_ = mip_status_t::NODE_LIMIT; + break; } - node_solve_info_t status = solve_node(node_ptr, - search_tree, - leaf_problem, - basis_factors, - basic_list, - nonbasic_list, - node_presolver, - thread_type_t::EXPLORATION, - recompute_bounds_and_basis, - original_lp_.lower, - original_lp_.upper, - settings_.log); - - recompute_bounds_and_basis = !has_children(status); + dual::status_t lp_status = + solve_node_lp(node_ptr, worker_data, exploration_stats_, settings_.log); + + if (lp_status == dual::status_t::TIME_LIMIT) { + solver_status_ = mip_status_t::TIME_LIMIT; + break; + } else if (lp_status == dual::status_t::ITERATION_LIMIT) { + break; + } ++exploration_stats_.nodes_since_last_log; ++exploration_stats_.nodes_explored; --exploration_stats_.nodes_unexplored; - if (status == node_solve_info_t::TIME_LIMIT) { - solver_status_ = mip_exploration_status_t::TIME_LIMIT; - return; + auto [node_status, round_dir] = + update_tree(node_ptr, search_tree_, worker_data, lp_status, settings_.log); + + worker_data->recompute_basis = node_status != node_status_t::HAS_CHILDREN; + worker_data->recompute_bounds = node_status != node_status_t::HAS_CHILDREN; - } else if (has_children(status)) { + if (node_status == node_status_t::HAS_CHILDREN) { // The stack should only contain the children of the current parent. // If the stack size is greater than 0, // we pop the current node from the stack and place it in the global heap, @@ -997,217 +873,109 @@ void branch_and_bound_t::explore_subtree(i_t task_id, if (stack.size() > 0) { mip_node_t* node = stack.back(); stack.pop_back(); - - // The order here matters. We want to create a copy of the node - // before adding to the global heap. Otherwise, - // some thread may consume the node (possibly fathoming it) - // before we had the chance to add to the diving queue. - // This lead to a SIGSEGV. Although, in this case, it - // would be better if we discard the node instead. - if (get_heap_size() > settings_.num_bfs_threads) { - std::vector lower = original_lp_.lower; - std::vector upper = original_lp_.upper; - std::fill( - node_presolver.bounds_changed.begin(), node_presolver.bounds_changed.end(), false); - node->get_variable_bounds(lower, upper, node_presolver.bounds_changed); - - mutex_dive_queue_.lock(); - diving_queue_.emplace(node->detach_copy(), std::move(lower), std::move(upper)); - mutex_dive_queue_.unlock(); - } - - mutex_heap_.lock(); - heap_.push(node); - mutex_heap_.unlock(); + node_queue_.push(node); } exploration_stats_.nodes_unexplored += 2; - if (status == node_solve_info_t::UP_CHILD_FIRST) { - stack.push_front(node_ptr->get_down_child()); + if (round_dir == rounding_direction_t::UP) { + if (node_queue_.best_first_queue_size() < min_node_queue_size_) { + node_queue_.push(node_ptr->get_down_child()); + } else { + stack.push_front(node_ptr->get_down_child()); + } + stack.push_front(node_ptr->get_up_child()); } else { - stack.push_front(node_ptr->get_up_child()); + if (node_queue_.best_first_queue_size() < min_node_queue_size_) { + node_queue_.push(node_ptr->get_up_child()); + } else { + stack.push_front(node_ptr->get_up_child()); + } + stack.push_front(node_ptr->get_down_child()); } } } + + worker_pool_.return_worker_to_pool(worker_data); + active_workers_per_type[EXPLORATION]--; } template -void branch_and_bound_t::best_first_thread(i_t task_id, - search_tree_t& search_tree, - const csr_matrix_t& Arow) +void branch_and_bound_t::dive_with(bnb_worker_data_t* worker_data) { - f_t lower_bound = -inf; - f_t upper_bound = inf; - f_t abs_gap = inf; - f_t rel_gap = inf; - - // Make a copy of the original LP. We will modify its bounds at each leaf - lp_problem_t leaf_problem = original_lp_; - std::vector row_sense; - bounds_strengthening_t node_presolver(leaf_problem, Arow, row_sense, var_types_); - - const i_t m = leaf_problem.num_rows; - basis_update_mpf_t basis_factors(m, settings_.refactor_frequency); - std::vector basic_list(m); - std::vector nonbasic_list; - - while (solver_status_ == mip_exploration_status_t::RUNNING && - abs_gap > settings_.absolute_mip_gap_tol && rel_gap > settings_.relative_mip_gap_tol && - (active_subtrees_ > 0 || get_heap_size() > 0)) { - mip_node_t* start_node = nullptr; - - // If there any node left in the heap, we pop the top node and explore it. - mutex_heap_.lock(); - if (heap_.size() > 0) { - start_node = heap_.top(); - heap_.pop(); - active_subtrees_++; - } - mutex_heap_.unlock(); - - if (start_node != nullptr) { - if (get_upper_bound() < start_node->lower_bound) { - // This node was put on the heap earlier but its lower bound is now greater than the - // current upper bound - search_tree.graphviz_node(settings_.log, start_node, "cutoff", start_node->lower_bound); - search_tree.update(start_node, node_status_t::FATHOMED); - active_subtrees_--; - continue; - } + logger_t log; + log.log = false; - // Best-first search with plunging - explore_subtree(task_id, - start_node, - search_tree, - leaf_problem, - node_presolver, - basis_factors, - basic_list, - nonbasic_list); - - active_subtrees_--; - } + bnb_worker_type_t diving_type = worker_data->worker_type; + const i_t node_limit = settings_.diving_settings.node_limit; + const i_t backtrack = settings_.diving_settings.backtrack; - lower_bound = get_lower_bound(); - upper_bound = get_upper_bound(); - abs_gap = upper_bound - lower_bound; - rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); - } + worker_data->recompute_basis = true; + worker_data->recompute_bounds = true; - // Check if it is the last thread that exited the loop and no - // timeout or numerical error has happen. - if (solver_status_ == mip_exploration_status_t::RUNNING) { - if (active_subtrees_ == 0) { - solver_status_ = mip_exploration_status_t::COMPLETED; - } else { - local_lower_bounds_[task_id] = inf; - } - } -} + search_tree_t dive_tree(std::move(*worker_data->start_node)); + std::deque*> stack; + stack.push_front(&dive_tree.root); -template -void branch_and_bound_t::diving_thread(const csr_matrix_t& Arow) -{ - logger_t log; - log.log = false; - // Make a copy of the original LP. We will modify its bounds at each leaf - lp_problem_t leaf_problem = original_lp_; - std::vector row_sense; - bounds_strengthening_t node_presolver(leaf_problem, Arow, row_sense, var_types_); - - const i_t m = leaf_problem.num_rows; - basis_update_mpf_t basis_factors(m, settings_.refactor_frequency); - std::vector basic_list(m); - std::vector nonbasic_list; - - while (solver_status_ == mip_exploration_status_t::RUNNING && - (active_subtrees_ > 0 || get_heap_size() > 0)) { - std::optional> start_node; - - mutex_dive_queue_.lock(); - if (diving_queue_.size() > 0) { start_node = diving_queue_.pop(); } - mutex_dive_queue_.unlock(); - - if (start_node.has_value()) { - if (get_upper_bound() < start_node->node.lower_bound) { continue; } - - bool recompute_bounds_and_basis = true; - i_t nodes_explored = 0; - search_tree_t subtree(std::move(start_node->node)); - std::deque*> stack; - stack.push_front(&subtree.root); - - while (stack.size() > 0 && solver_status_ == mip_exploration_status_t::RUNNING) { - mip_node_t* node_ptr = stack.front(); - stack.pop_front(); - f_t upper_bound = get_upper_bound(); - f_t rel_gap = user_relative_gap(original_lp_, upper_bound, node_ptr->lower_bound); - - if (node_ptr->lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { - recompute_bounds_and_basis = true; - continue; - } + bnb_stats_t dive_stats; + dive_stats.total_lp_iters = 0; + dive_stats.total_lp_solve_time = 0; + dive_stats.nodes_explored = 0; + dive_stats.nodes_unexplored = 0; - if (toc(exploration_stats_.start_time) > settings_.time_limit) { return; } + while (stack.size() > 0 && solver_status_ == mip_status_t::UNSET) { + mip_node_t* node_ptr = stack.front(); + stack.pop_front(); - if (nodes_explored >= 1000) { break; } + f_t lower_bound = node_ptr->lower_bound; + f_t upper_bound = upper_bound_; + f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); + worker_data->lower_bound = lower_bound; - node_solve_info_t status = solve_node(node_ptr, - subtree, - leaf_problem, - basis_factors, - basic_list, - nonbasic_list, - node_presolver, - thread_type_t::DIVING, - recompute_bounds_and_basis, - start_node->lower, - start_node->upper, - log); + if (node_ptr->lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { + worker_data->recompute_basis = true; + worker_data->recompute_bounds = true; + continue; + } - nodes_explored++; + if (toc(exploration_stats_.start_time) > settings_.time_limit) { break; } + if (dive_stats.nodes_explored > node_limit) { break; } - recompute_bounds_and_basis = !has_children(status); + dual::status_t lp_status = solve_node_lp(node_ptr, worker_data, dive_stats, log); - if (status == node_solve_info_t::TIME_LIMIT) { - solver_status_ = mip_exploration_status_t::TIME_LIMIT; - return; + if (lp_status == dual::status_t::TIME_LIMIT) { + solver_status_ = mip_status_t::TIME_LIMIT; + break; + } else if (lp_status == dual::status_t::ITERATION_LIMIT) { + break; + } - } else if (has_children(status)) { - if (status == node_solve_info_t::UP_CHILD_FIRST) { - stack.push_front(node_ptr->get_down_child()); - stack.push_front(node_ptr->get_up_child()); - } else { - stack.push_front(node_ptr->get_up_child()); - stack.push_front(node_ptr->get_down_child()); - } - } + ++dive_stats.nodes_explored; - if (stack.size() > 1) { - // If the diving thread is consuming the nodes faster than the - // best first search, then we split the current subtree at the - // lowest possible point and move to the queue, so it can - // be picked by another thread. - if (std::lock_guard lock(mutex_dive_queue_); - diving_queue_.size() < min_diving_queue_size_) { - mip_node_t* new_node = stack.back(); - stack.pop_back(); - - std::vector lower = start_node->lower; - std::vector upper = start_node->upper; - std::fill( - node_presolver.bounds_changed.begin(), node_presolver.bounds_changed.end(), false); - new_node->get_variable_bounds(lower, upper, node_presolver.bounds_changed); - - diving_queue_.emplace(new_node->detach_copy(), std::move(lower), std::move(upper)); - } - } + auto [node_status, round_dir] = update_tree(node_ptr, dive_tree, worker_data, lp_status, log); + + worker_data->recompute_basis = node_status != node_status_t::HAS_CHILDREN; + worker_data->recompute_bounds = node_status != node_status_t::HAS_CHILDREN; + + if (node_status == node_status_t::HAS_CHILDREN) { + if (round_dir == rounding_direction_t::UP) { + stack.push_front(node_ptr->get_down_child()); + stack.push_front(node_ptr->get_up_child()); + } else { + stack.push_front(node_ptr->get_up_child()); + stack.push_front(node_ptr->get_down_child()); } } + + if (stack.size() > 1 && stack.front()->depth - stack.back()->depth > backtrack) { + stack.pop_back(); + } } + + worker_pool_.return_worker_to_pool(worker_data); + active_workers_per_type[diving_type]--; } template @@ -1291,9 +1059,11 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut logger_t log; log.log = false; log.log_prefix = settings_.log.log_prefix; - solver_status_ = mip_exploration_status_t::UNSET; + solver_status_ = mip_status_t::UNSET; + is_running = false; exploration_stats_.nodes_unexplored = 0; exploration_stats_.nodes_explored = 0; + original_lp_.A.to_compressed_row(Arow_); if (guess_.size() != 0) { std::vector crushed_guess; @@ -1355,15 +1125,15 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } if (root_status == lp_status_t::TIME_LIMIT) { - solver_status_ = mip_exploration_status_t::TIME_LIMIT; - return set_final_solution(solution, -inf); + solver_status_ = mip_status_t::TIME_LIMIT; + set_final_solution(solution, -inf); + return solver_status_; } assert(root_vstatus_.size() == original_lp_.num_cols); set_uninitialized_steepest_edge_norms(edge_norms_); root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); - local_lower_bounds_.assign(settings_.num_bfs_threads, root_objective_); if (settings_.set_simplex_solution_callback != nullptr) { std::vector original_x; @@ -1421,8 +1191,9 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut pc_); if (toc(exploration_stats_.start_time) > settings_.time_limit) { - solver_status_ = mip_exploration_status_t::TIME_LIMIT; - return set_final_solution(solution, root_objective_); + solver_status_ = mip_status_t::TIME_LIMIT; + set_final_solution(solution, root_objective_); + return solver_status_; } // Choose variable to branch on @@ -1437,62 +1208,185 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut root_vstatus_, original_lp_, log); + node_queue_.push(search_tree_.root.get_down_child()); + node_queue_.push(search_tree_.root.get_up_child()); + + diving_heuristics_settings_t diving_settings = settings_.diving_settings; + bool is_ramp_up_finished = false; - csr_matrix_t Arow(1, 1, 0); - original_lp_.A.to_compressed_row(Arow); + std::vector worker_types = {EXPLORATION}; + std::array max_num_workers_per_type; + max_num_workers_per_type.fill(0); + max_num_workers_per_type[EXPLORATION] = settings_.num_threads; + worker_pool_.init(2 * settings_.num_threads, original_lp_, Arow_, var_types_, settings_); + active_workers_per_type.fill(0); settings_.log.printf("Exploring the B&B tree using %d threads (best-first = %d, diving = %d)\n", settings_.num_threads, - settings_.num_bfs_threads, - settings_.num_diving_threads); - - settings_.log.printf( - " | Explored | Unexplored | Objective | Bound | Depth | Iter/Node | Gap " - "| Time |\n"); + settings_.num_bfs_workers, + settings_.num_threads - settings_.num_bfs_workers); - exploration_stats_.nodes_explored = 0; + exploration_stats_.nodes_explored = 1; exploration_stats_.nodes_unexplored = 2; exploration_stats_.nodes_since_last_log = 0; - exploration_stats_.last_log = tic(); - active_subtrees_ = 0; - min_diving_queue_size_ = 4 * settings_.num_diving_threads; - solver_status_ = mip_exploration_status_t::RUNNING; + exploration_stats_.last_log = 0.0; + is_running = true; lower_bound_ceiling_ = inf; - should_report_ = true; + min_node_queue_size_ = 2 * settings_.num_threads; + + settings_.log.printf( + " | Explored | Unexplored | Objective | Bound | Depth | Iter/Node | Gap " + "| Time |\n"); #pragma omp parallel num_threads(settings_.num_threads) { #pragma omp master { - auto down_child = search_tree_.root.get_down_child(); - auto up_child = search_tree_.root.get_up_child(); - i_t initial_size = 2 * settings_.num_threads; + f_t lower_bound = get_lower_bound(); + f_t abs_gap = upper_bound_ - lower_bound; + f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); + i_t last_node_depth = 0; + + while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && + rel_gap > settings_.relative_mip_gap_tol && + (active_workers_per_type[0] > 0 || node_queue_.best_first_queue_size() > 0)) { + bool launched_any_task = false; + lower_bound = get_lower_bound(); + abs_gap = upper_bound_ - lower_bound; + rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); + + repair_heuristic_solutions(); + + if (!is_ramp_up_finished) { + if (node_queue_.best_first_queue_size() >= min_node_queue_size_) { + if (!std::isfinite(upper_bound_)) { diving_settings.disable_guided_diving = true; } + max_num_workers_per_type = + bnb_get_num_workers_round_robin(settings_.num_threads, diving_settings); + worker_types = bnb_get_worker_types(diving_settings); + is_ramp_up_finished = true; + +#ifdef CUOPT_LOG_DEBUG + settings_.log.debug( + "Ramp-up phase is finished. num active workers = %d, heap size = %d\n", + active_workers_per_type[EXPLORATION], + node_queue_.best_first_queue_size()); + + for (auto type : worker_types) { + settings_.log.debug("%s: max num of workers = %d", + feasible_solution_symbol(type), + max_num_workers_per_type[type]); + } +#endif + } + } -#pragma omp task - exploration_ramp_up(down_child, &search_tree_, Arow, initial_size); + // If the guided diving was disabled previously due to the lack of an incumbent solution, + // re-enable as soon as a new incumbent is found. + if (settings_.diving_settings.disable_guided_diving != + diving_settings.disable_guided_diving) { + if (std::isfinite(upper_bound_)) { + diving_settings.disable_guided_diving = settings_.diving_settings.disable_guided_diving; + max_num_workers_per_type = + bnb_get_num_workers_round_robin(settings_.num_threads, diving_settings); + worker_types = bnb_get_worker_types(diving_settings); + +#ifdef CUOPT_LOG_DEBUG + for (auto type : worker_types) { + settings_.log.debug("%s: max num of workers = %d", + feasible_solution_symbol(type), + max_num_workers_per_type[type]); + } +#endif + } + } -#pragma omp task - exploration_ramp_up(up_child, &search_tree_, Arow, initial_size); - } + f_t now = toc(exploration_stats_.start_time); + f_t time_since_last_log = + exploration_stats_.last_log == 0 ? 1.0 : toc(exploration_stats_.last_log); + i_t nodes_since_last_log = exploration_stats_.nodes_since_last_log; + + if (((nodes_since_last_log >= 1000 || abs_gap < 10 * settings_.absolute_mip_gap_tol) && + time_since_last_log >= 1) || + (time_since_last_log > 30) || now > settings_.time_limit) { + i_t depth = node_queue_.best_first_queue_size() > 0 ? node_queue_.bfs_top()->depth + : last_node_depth; + report(" ", upper_bound_, lower_bound, depth); + exploration_stats_.last_log = tic(); + exploration_stats_.nodes_since_last_log = 0; + } -#pragma omp barrier + if (now > settings_.time_limit) { + solver_status_ = mip_status_t::TIME_LIMIT; + break; + } -#pragma omp master - { - for (i_t i = 0; i < settings_.num_bfs_threads; i++) { -#pragma omp task - best_first_thread(i, search_tree_, Arow); - } + for (auto type : worker_types) { + if (active_workers_per_type[type] >= max_num_workers_per_type[type]) { continue; } + + // Get an idle worker. + bnb_worker_data_t* worker = worker_pool_.get_idle_worker(); + if (worker == nullptr) { break; } + + if (type == EXPLORATION) { + // If there any node left in the heap, we pop the top node and explore it. + std::optional*> start_node = node_queue_.pop_best_first(); + + if (!start_node.has_value()) { continue; } + if (upper_bound_ < start_node.value()->lower_bound) { + // This node was put on the heap earlier but its lower bound is now greater than the + // current upper bound + search_tree_.graphviz_node( + settings_.log, start_node.value(), "cutoff", start_node.value()->lower_bound); + search_tree_.update(start_node.value(), node_status_t::FATHOMED); + continue; + } + + // Remove the worker from the idle list. + worker_pool_.pop_idle_worker(); + worker->init_best_first(start_node.value(), original_lp_); + last_node_depth = start_node.value()->depth; + active_workers_per_type[type]++; + launched_any_task = true; + +#pragma omp task affinity(worker) + plunge_with(worker); + + } else { + std::optional*> start_node = node_queue_.pop_diving(); + + if (!start_node.has_value()) { continue; } + if (upper_bound_ < start_node.value()->lower_bound || + start_node.value()->depth < diving_settings.min_node_depth) { + continue; + } + + bool is_feasible = + worker->init_diving(start_node.value(), type, original_lp_, settings_); + if (!is_feasible) { continue; } + + // Remove the worker from the idle list. + worker_pool_.pop_idle_worker(); + active_workers_per_type[type]++; + launched_any_task = true; + +#pragma omp task affinity(worker) + dive_with(worker); + } + } - for (i_t i = 0; i < settings_.num_diving_threads; i++) { -#pragma omp task - diving_thread(Arow); + // If no new task was launched in this iteration, suspend temporarily the + // execution of the master. As of 8/Jan/2026, GCC does not + // implement taskyield, but LLVM does. + if (!launched_any_task) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } } } } - f_t lower_bound = heap_.size() > 0 ? heap_.top()->lower_bound : search_tree_.root.lower_bound; - return set_final_solution(solution, lower_bound); + is_running = false; + f_t lower_bound = node_queue_.best_first_queue_size() > 0 ? node_queue_.get_lower_bound() + : search_tree_.root.lower_bound; + set_final_solution(solution, lower_bound); + return solver_status_; } #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp index 38438cc9e..c4836b356 100644 --- a/cpp/src/dual_simplex/branch_and_bound.hpp +++ b/cpp/src/dual_simplex/branch_and_bound.hpp @@ -1,15 +1,17 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ #pragma once -#include +#include +#include #include #include +#include #include #include #include @@ -20,7 +22,6 @@ #include #include -#include #include namespace cuopt::linear_programming::dual_simplex { @@ -35,45 +36,12 @@ enum class mip_status_t { UNSET = 6, // The status is not set }; -enum class mip_exploration_status_t { - UNSET = 0, // The status is not set - TIME_LIMIT = 1, // The solver reached a time limit - NODE_LIMIT = 2, // The maximum number of nodes was reached (not implemented) - NUMERICAL = 3, // The solver encountered a numerical error - RUNNING = 4, // The solver is currently exploring the tree - COMPLETED = 5, // The solver finished exploring the tree -}; - -enum class node_solve_info_t { - NO_CHILDREN = 0, // The node does not produced children - UP_CHILD_FIRST = 1, // The up child should be explored first - DOWN_CHILD_FIRST = 2, // The down child should be explored first - TIME_LIMIT = 3, // The solver reached a time limit - ITERATION_LIMIT = 4, // The solver reached a iteration limit - NUMERICAL = 5 // The solver encounter a numerical error when solving the node -}; - -// Indicate the search and variable selection algorithms used by the thread (See [1]). -// -// [1] T. Achterberg, “Constraint Integer Programming,” PhD, Technischen Universität Berlin, -// Berlin, 2007. doi: 10.14279/depositonce-1634. -enum class thread_type_t { - EXPLORATION = 0, // Best-First + Plunging. Pseudocost branching + Martin's criteria. - DIVING = 1, -}; - -template -class bounds_strengthening_t; - template void upper_bound_callback(f_t upper_bound); template class branch_and_bound_t { public: - template - using mip_node_heap_t = std::priority_queue, node_compare_t>; - branch_and_bound_t(const user_problem_t& user_problem, const simplex_solver_settings_t& solver_settings); @@ -109,9 +77,7 @@ class branch_and_bound_t { f_t& repaired_obj, std::vector& repaired_solution) const; - f_t get_upper_bound(); f_t get_lower_bound(); - i_t get_heap_size(); bool enable_concurrent_lp_root_solve() const { return enable_concurrent_lp_root_solve_; } std::atomic* get_root_concurrent_halt() { return &root_concurrent_halt_; } void set_root_concurrent_halt(int value) { root_concurrent_halt_ = value; } @@ -128,34 +94,22 @@ class branch_and_bound_t { std::vector guess_; // LP relaxation + csr_matrix_t Arow_; lp_problem_t original_lp_; std::vector new_slacks_; std::vector var_types_; - // Local lower bounds for each thread - std::vector> local_lower_bounds_; - // Mutex for upper bound omp_mutex_t mutex_upper_; // Global variable for upper bound - f_t upper_bound_; + omp_atomic_t upper_bound_; // Global variable for incumbent. The incumbent should be updated with the upper bound mip_solution_t incumbent_; // Structure with the general info of the solver. - struct stats_t { - f_t start_time = 0.0; - omp_atomic_t total_lp_solve_time = 0.0; - omp_atomic_t nodes_explored = 0; - omp_atomic_t nodes_unexplored = 0; - omp_atomic_t total_lp_iters = 0; - - // This should only be used by the main thread - omp_atomic_t last_log = 0.0; - omp_atomic_t nodes_since_last_log = 0; - } exploration_stats_; + bnb_stats_t exploration_stats_; // Mutex for repair omp_mutex_t mutex_repair_; @@ -175,86 +129,74 @@ class branch_and_bound_t { // Pseudocosts pseudo_costs_t pc_; - // Heap storing the nodes to be explored. - omp_mutex_t mutex_heap_; - mip_node_heap_t*> heap_; + // Heap storing the nodes waiting to be explored. + node_queue_t node_queue_; // Search tree search_tree_t search_tree_; - // Count the number of subtrees that are currently being explored. - omp_atomic_t active_subtrees_; + // Count the number of workers per type that either are being executed or + // are waiting to be executed. + std::array, bnb_num_worker_types> active_workers_per_type; - // Queue for storing the promising node for performing dives. - omp_mutex_t mutex_dive_queue_; - diving_queue_t diving_queue_; - i_t min_diving_queue_size_; + // Worker pool + bnb_worker_pool_t worker_pool_; // Global status of the solver. - omp_atomic_t solver_status_; + omp_atomic_t solver_status_; + omp_atomic_t is_running{false}; - omp_atomic_t should_report_; + // Minimum number of node in the queue. When the queue size is less than + // this variable, the nodes are added directly to the queue instead of + // the local stack. This also determines the end of the ramp-up phase. + i_t min_node_queue_size_; // In case, a best-first thread encounters a numerical issue when solving a node, // its blocks the progression of the lower bound. omp_atomic_t lower_bound_ceiling_; + void report_heuristic(f_t obj); + void report(std::string symbol, f_t obj, f_t lower_bound, i_t node_depth); + // Set the final solution. - mip_status_t set_final_solution(mip_solution_t& solution, f_t lower_bound); + void set_final_solution(mip_solution_t& solution, f_t lower_bound); // Update the incumbent solution with the new feasible solution // found during branch and bound. void add_feasible_solution(f_t leaf_objective, const std::vector& leaf_solution, i_t leaf_depth, - thread_type_t thread_type); + bnb_worker_type_t thread_type); // Repairs low-quality solutions from the heuristics, if it is applicable. void repair_heuristic_solutions(); - // Ramp-up phase of the solver, where we greedily expand the tree until - // there is enough unexplored nodes. This is done recursively using OpenMP tasks. - void exploration_ramp_up(mip_node_t* node, - search_tree_t* search_tree, - const csr_matrix_t& Arow, - i_t initial_heap_size); - - // Explore the search tree using the best-first search with plunging strategy. - void explore_subtree(i_t task_id, - mip_node_t* start_node, - search_tree_t& search_tree, - lp_problem_t& leaf_problem, - bounds_strengthening_t& node_presolver, - basis_update_mpf_t& basis_update, - std::vector& basic_list, - std::vector& nonbasic_list); - - // Each "main" thread pops a node from the global heap and then performs a plunge - // (i.e., a shallow dive) into the subtree determined by the node. - void best_first_thread(i_t task_id, - search_tree_t& search_tree, - const csr_matrix_t& Arow); - - // Each diving thread pops the first node from the dive queue and then performs - // a deep dive into the subtree determined by the node. - void diving_thread(const csr_matrix_t& Arow); - - // Solve the LP relaxation of a leaf node and update the tree. - node_solve_info_t solve_node(mip_node_t* node_ptr, - search_tree_t& search_tree, - lp_problem_t& leaf_problem, - basis_update_mpf_t& basis_factors, - std::vector& basic_list, - std::vector& nonbasic_list, - bounds_strengthening_t& node_presolver, - thread_type_t thread_type, - bool recompute_basis_and_bounds, - const std::vector& root_lower, - const std::vector& root_upper, + // Perform a plunge over a subtree using a given worker. + void plunge_with(bnb_worker_data_t* worker_data); + + // Perform a deep dive over a subtree using a given worker. + void dive_with(bnb_worker_data_t* worker_data); + + // Solve the LP relaxation of a leaf node + dual::status_t solve_node_lp(mip_node_t* node_ptr, + bnb_worker_data_t* worker_data, + bnb_stats_t& stats, logger_t& log); - // Sort the children based on the Martin's criteria. - rounding_direction_t child_selection(mip_node_t* node_ptr); + // Update the tree based on the LP relaxation. Returns the status + // of the node and, if appropriated, the preferred rounding direction + // when visiting the children. + std::pair update_tree( + mip_node_t* node_ptr, + search_tree_t& search_tree, + bnb_worker_data_t* worker_data, + dual::status_t lp_status, + logger_t& log); + + // Selects the variable to branch on. + branch_variable_t variable_selection(mip_node_t* node_ptr, + const std::vector& fractional, + bnb_worker_data_t* worker_data); }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index 23d9a0e8e..8ee3fb0ce 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -785,8 +785,15 @@ i_t primal_push(const lp_problem_t& lp, factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); - basis_repair( - lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); + basis_repair(lp.A, + settings, + lp.lower, + lp.upper, + deficient, + slacks_needed, + basic_list, + nonbasic_list, + vstatus); if (factorize_basis( lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); @@ -1132,7 +1139,15 @@ crossover_status_t crossover(const lp_problem_t& lp, rank = factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); - basis_repair(lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); + basis_repair(lp.A, + settings, + lp.lower, + lp.upper, + deficient, + slacks_needed, + basic_list, + nonbasic_list, + vstatus); if (factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); @@ -1323,7 +1338,15 @@ crossover_status_t crossover(const lp_problem_t& lp, factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); - basis_repair(lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); + basis_repair(lp.A, + settings, + lp.lower, + lp.upper, + deficient, + slacks_needed, + basic_list, + nonbasic_list, + vstatus); if (factorize_basis( lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); diff --git a/cpp/src/dual_simplex/diving_heuristics.cpp b/cpp/src/dual_simplex/diving_heuristics.cpp new file mode 100644 index 000000000..b5e58a097 --- /dev/null +++ b/cpp/src/dual_simplex/diving_heuristics.cpp @@ -0,0 +1,302 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#include +#include + +namespace cuopt::linear_programming::dual_simplex { + +template +branch_variable_t line_search_diving(const std::vector& fractional, + const std::vector& solution, + const std::vector& root_solution, + logger_t& log) +{ + constexpr f_t eps = 1e-6; + i_t branch_var = -1; + f_t min_score = std::numeric_limits::max(); + rounding_direction_t round_dir = rounding_direction_t::NONE; + + for (auto j : fractional) { + f_t score = inf; + rounding_direction_t dir = rounding_direction_t::NONE; + + if (solution[j] < root_solution[j] - eps) { + f_t f = solution[j] - std::floor(solution[j]); + f_t d = root_solution[j] - solution[j]; + score = f / d; + dir = rounding_direction_t::DOWN; + + } else if (solution[j] > root_solution[j] + eps) { + f_t f = std::ceil(solution[j]) - solution[j]; + f_t d = solution[j] - root_solution[j]; + score = f / d; + dir = rounding_direction_t::UP; + } + + if (min_score > score) { + min_score = score; + branch_var = j; + round_dir = dir; + } + } + + // If the current solution is equal to the root solution, arbitrarily + // set the branch variable to the first fractional variable and round it down + if (round_dir == rounding_direction_t::NONE) { + branch_var = fractional[0]; + round_dir = rounding_direction_t::DOWN; + } + + assert(round_dir != rounding_direction_t::NONE); + assert(branch_var >= 0); + + log.debug("Line search diving: selected var %d with val = %e, round dir = %d and score = %e\n", + branch_var, + solution[branch_var], + round_dir, + min_score); + + return {branch_var, round_dir}; +} + +template +branch_variable_t pseudocost_diving(pseudo_costs_t& pc, + const std::vector& fractional, + const std::vector& solution, + const std::vector& root_solution, + logger_t& log) +{ + i_t branch_var = -1; + f_t max_score = std::numeric_limits::lowest(); + rounding_direction_t round_dir = rounding_direction_t::NONE; + constexpr f_t eps = 1e-6; + + i_t num_initialized_down; + i_t num_initialized_up; + f_t pseudo_cost_down_avg; + f_t pseudo_cost_up_avg; + pc.initialized( + num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); + + for (auto j : fractional) { + rounding_direction_t dir = rounding_direction_t::NONE; + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + + pc.pseudo_cost_mutex[j].lock(); + f_t pc_down = pc.pseudo_cost_num_down[j] != 0 + ? pc.pseudo_cost_sum_down[j] / pc.pseudo_cost_num_down[j] + : pseudo_cost_down_avg; + + f_t pc_up = pc.pseudo_cost_num_up[j] != 0 ? pc.pseudo_cost_sum_up[j] / pc.pseudo_cost_num_up[j] + : pseudo_cost_up_avg; + pc.pseudo_cost_mutex[j].unlock(); + + f_t score_down = std::sqrt(f_up) * (1 + pc_up) / (1 + pc_down); + f_t score_up = std::sqrt(f_down) * (1 + pc_down) / (1 + pc_up); + f_t score = 0; + + if (solution[j] < root_solution[j] - 0.4) { + score = score_down; + dir = rounding_direction_t::DOWN; + } else if (solution[j] > root_solution[j] + 0.4) { + score = score_up; + dir = rounding_direction_t::UP; + } else if (f_down < 0.3) { + score = score_down; + dir = rounding_direction_t::DOWN; + } else if (f_down > 0.7) { + score = score_up; + dir = rounding_direction_t::UP; + } else if (pc_down < pc_up + eps) { + score = score_down; + dir = rounding_direction_t::DOWN; + } else { + score = score_up; + dir = rounding_direction_t::UP; + } + + if (score > max_score) { + max_score = score; + branch_var = j; + round_dir = dir; + } + } + + assert(round_dir != rounding_direction_t::NONE); + assert(branch_var >= 0); + + log.debug("Pseudocost diving: selected var %d with val = %e, round dir = %d and score = %e\n", + branch_var, + solution[branch_var], + round_dir, + max_score); + + return {branch_var, round_dir}; +} + +template +branch_variable_t guided_diving(pseudo_costs_t& pc, + const std::vector& fractional, + const std::vector& solution, + const std::vector& incumbent, + logger_t& log) +{ + i_t branch_var = -1; + f_t max_score = std::numeric_limits::lowest(); + rounding_direction_t round_dir = rounding_direction_t::NONE; + constexpr f_t eps = 1e-6; + + i_t num_initialized_down; + i_t num_initialized_up; + f_t pseudo_cost_down_avg; + f_t pseudo_cost_up_avg; + pc.initialized( + num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); + + for (auto j : fractional) { + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + f_t down_dist = std::abs(incumbent[j] - std::floor(solution[j])); + f_t up_dist = std::abs(std::ceil(solution[j]) - incumbent[j]); + rounding_direction_t dir = + down_dist < up_dist + eps ? rounding_direction_t::DOWN : rounding_direction_t::UP; + + pc.pseudo_cost_mutex[j].lock(); + f_t pc_down = pc.pseudo_cost_num_down[j] != 0 + ? pc.pseudo_cost_sum_down[j] / pc.pseudo_cost_num_down[j] + : pseudo_cost_down_avg; + + f_t pc_up = pc.pseudo_cost_num_up[j] != 0 ? pc.pseudo_cost_sum_up[j] / pc.pseudo_cost_num_up[j] + : pseudo_cost_up_avg; + pc.pseudo_cost_mutex[j].unlock(); + + f_t score1 = dir == rounding_direction_t::DOWN ? 5 * pc_down * f_down : 5 * pc_up * f_up; + f_t score2 = dir == rounding_direction_t::DOWN ? pc_up * f_up : pc_down * f_down; + f_t score = (score1 + score2) / 6; + + if (score > max_score) { + max_score = score; + branch_var = j; + round_dir = dir; + } + } + + assert(round_dir != rounding_direction_t::NONE); + assert(branch_var >= 0); + + log.debug("Guided diving: selected var %d with val = %e, round dir = %d and score = %e\n", + branch_var, + solution[branch_var], + round_dir, + max_score); + + return {branch_var, round_dir}; +} + +template +std::tuple calculate_variable_locks(const lp_problem_t& lp_problem, i_t var_idx) +{ + i_t up_lock = 0; + i_t down_lock = 0; + i_t start = lp_problem.A.col_start[var_idx]; + i_t end = lp_problem.A.col_start[var_idx + 1]; + + for (i_t k = start; k < end; ++k) { + f_t nz_val = lp_problem.A.x[k]; + i_t nz_row = lp_problem.A.i[k]; + + if (std::isfinite(lp_problem.upper[nz_row]) && std::isfinite(lp_problem.lower[nz_row])) { + down_lock += 1; + up_lock += 1; + continue; + } + + f_t sign = std::isfinite(lp_problem.upper[nz_row]) ? 1 : -1; + + if (nz_val * sign > 0) { + up_lock += 1; + } else { + down_lock += 1; + } + } + + return {up_lock, down_lock}; +} + +template +branch_variable_t coefficient_diving(const lp_problem_t& lp_problem, + const std::vector& fractional, + const std::vector& solution, + logger_t& log) +{ + i_t branch_var = -1; + i_t min_locks = std::numeric_limits::max(); + rounding_direction_t round_dir = rounding_direction_t::NONE; + constexpr f_t eps = 1e-6; + + for (auto j : fractional) { + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + auto [up_lock, down_lock] = calculate_variable_locks(lp_problem, j); + i_t locks = std::min(up_lock, down_lock); + + if (min_locks > locks) { + min_locks = locks; + branch_var = j; + + if (up_lock < down_lock) { + round_dir = rounding_direction_t::UP; + } else if (up_lock > down_lock) { + round_dir = rounding_direction_t::DOWN; + } else if (f_down < f_up + eps) { + round_dir = rounding_direction_t::DOWN; + } else { + round_dir = rounding_direction_t::UP; + } + } + } + + assert(round_dir != rounding_direction_t::NONE); + assert(branch_var >= 0); + + log.debug( + "Coefficient diving: selected var %d with val = %e, round dir = %d and min locks = %d\n", + branch_var, + solution[branch_var], + round_dir, + min_locks); + + return {branch_var, round_dir}; +} + +#ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE +template branch_variable_t line_search_diving(const std::vector& fractional, + const std::vector& solution, + const std::vector& root_solution, + logger_t& log); + +template branch_variable_t pseudocost_diving(pseudo_costs_t& pc, + const std::vector& fractional, + const std::vector& solution, + const std::vector& root_solution, + logger_t& log); + +template branch_variable_t guided_diving(pseudo_costs_t& pc, + const std::vector& fractional, + const std::vector& solution, + const std::vector& incumbent, + logger_t& log); + +template branch_variable_t coefficient_diving(const lp_problem_t& lp_problem, + const std::vector& fractional, + const std::vector& solution, + logger_t& log); +#endif + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/diving_heuristics.hpp b/cpp/src/dual_simplex/diving_heuristics.hpp new file mode 100644 index 000000000..1f44fee31 --- /dev/null +++ b/cpp/src/dual_simplex/diving_heuristics.hpp @@ -0,0 +1,49 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include +#include +#include +#include + +namespace cuopt::linear_programming::dual_simplex { + +template +struct branch_variable_t { + i_t variable; + rounding_direction_t direction; +}; + +template +branch_variable_t line_search_diving(const std::vector& fractional, + const std::vector& solution, + const std::vector& root_solution, + logger_t& log); + +template +branch_variable_t pseudocost_diving(pseudo_costs_t& pc, + const std::vector& fractional, + const std::vector& solution, + const std::vector& root_solution, + logger_t& log); + +template +branch_variable_t guided_diving(pseudo_costs_t& pc, + const std::vector& fractional, + const std::vector& solution, + const std::vector& incumbent, + logger_t& log); + +template +branch_variable_t coefficient_diving(const lp_problem_t& lp_problem, + const std::vector& fractional, + const std::vector& solution, + logger_t& log); + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/diving_queue.hpp b/cpp/src/dual_simplex/diving_queue.hpp deleted file mode 100644 index f7035109e..000000000 --- a/cpp/src/dual_simplex/diving_queue.hpp +++ /dev/null @@ -1,73 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include -#include -#include - -#include - -namespace cuopt::linear_programming::dual_simplex { - -template -struct diving_root_t { - mip_node_t node; - std::vector lower; - std::vector upper; - - diving_root_t(mip_node_t&& node, std::vector&& lower, std::vector&& upper) - : node(std::move(node)), lower(std::move(lower)), upper(std::move(upper)) - { - } - - friend bool operator>(const diving_root_t& a, const diving_root_t& b) - { - return a.node.lower_bound > b.node.lower_bound; - } -}; - -// A min-heap for storing the starting nodes for the dives. -// This has a maximum size of 1024, such that the container -// will discard the least promising node if the queue is full. -template -class diving_queue_t { - private: - std::vector> buffer; - static constexpr i_t max_size_ = 1024; - - public: - diving_queue_t() { buffer.reserve(max_size_); } - - void push(diving_root_t&& node) - { - buffer.push_back(std::move(node)); - std::push_heap(buffer.begin(), buffer.end(), std::greater<>()); - if (buffer.size() > max_size() - 1) { buffer.pop_back(); } - } - - void emplace(mip_node_t&& node, std::vector&& lower, std::vector&& upper) - { - buffer.emplace_back(std::move(node), std::move(lower), std::move(upper)); - std::push_heap(buffer.begin(), buffer.end(), std::greater<>()); - if (buffer.size() > max_size() - 1) { buffer.pop_back(); } - } - - diving_root_t pop() - { - std::pop_heap(buffer.begin(), buffer.end(), std::greater<>()); - diving_root_t node = std::move(buffer.back()); - buffer.pop_back(); - return node; - } - - i_t size() const { return buffer.size(); } - constexpr i_t max_size() const { return max_size_; } - const diving_root_t& top() const { return buffer.front(); } - void clear() { buffer.clear(); } -}; - -} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/logger.hpp b/cpp/src/dual_simplex/logger.hpp index ac5e394f9..f81308670 100644 --- a/cpp/src/dual_simplex/logger.hpp +++ b/cpp/src/dual_simplex/logger.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -30,22 +30,24 @@ class logger_t { { } - void enable_log_to_file(std::string mode = "w") + void enable_log_to_file(const char* mode = "w") { if (log_file != nullptr) { std::fclose(log_file); } - log_file = std::fopen(log_filename.c_str(), mode.c_str()); + log_file = std::fopen(log_filename.c_str(), mode); log_to_file = true; } - void set_log_file(const std::string& filename) + void set_log_file(const std::string& filename, const char* mode = "w") { log_filename = filename; - enable_log_to_file(); + enable_log_to_file(mode); } void close_log_file() { if (log_file != nullptr) { std::fclose(log_file); } + log_file = nullptr; + log_to_file = false; } void printf(const char* fmt, ...) diff --git a/cpp/src/dual_simplex/mip_node.hpp b/cpp/src/dual_simplex/mip_node.hpp index 1d66a21f7..de147132a 100644 --- a/cpp/src/dual_simplex/mip_node.hpp +++ b/cpp/src/dual_simplex/mip_node.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -45,6 +45,7 @@ class mip_node_t { branch_var_lower(-std::numeric_limits::infinity()), branch_var_upper(std::numeric_limits::infinity()), fractional_val(std::numeric_limits::infinity()), + objective_estimate(std::numeric_limits::infinity()), vstatus(0) { children[0] = nullptr; @@ -59,6 +60,7 @@ class mip_node_t { node_id(0), branch_var(-1), branch_dir(rounding_direction_t::NONE), + objective_estimate(std::numeric_limits::infinity()), vstatus(basis) { children[0] = nullptr; @@ -80,6 +82,7 @@ class mip_node_t { branch_var(branch_variable), branch_dir(branch_direction), fractional_val(branch_var_value), + objective_estimate(parent_node->objective_estimate), vstatus(basis) { @@ -227,17 +230,19 @@ class mip_node_t { mip_node_t detach_copy() const { mip_node_t copy(lower_bound, vstatus); - copy.branch_var = branch_var; - copy.branch_dir = branch_dir; - copy.branch_var_lower = branch_var_lower; - copy.branch_var_upper = branch_var_upper; - copy.fractional_val = fractional_val; - copy.node_id = node_id; + copy.branch_var = branch_var; + copy.branch_dir = branch_dir; + copy.branch_var_lower = branch_var_lower; + copy.branch_var_upper = branch_var_upper; + copy.fractional_val = fractional_val; + copy.objective_estimate = objective_estimate; + copy.node_id = node_id; return copy; } node_status_t status; f_t lower_bound; + f_t objective_estimate; i_t depth; i_t node_id; i_t branch_var; @@ -262,22 +267,6 @@ void remove_fathomed_nodes(std::vector*>& stack) } } -template -class node_compare_t { - public: - bool operator()(const mip_node_t& a, const mip_node_t& b) const - { - return a.lower_bound > - b.lower_bound; // True if a comes before b, elements that come before are output last - } - - bool operator()(const mip_node_t* a, const mip_node_t* b) const - { - return a->lower_bound > - b->lower_bound; // True if a comes before b, elements that come before are output last - } -}; - template class search_tree_t { public: diff --git a/cpp/src/dual_simplex/node_queue.hpp b/cpp/src/dual_simplex/node_queue.hpp new file mode 100644 index 000000000..b5c9a9ea7 --- /dev/null +++ b/cpp/src/dual_simplex/node_queue.hpp @@ -0,0 +1,165 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +namespace cuopt::linear_programming::dual_simplex { + +// This is a generic heap implementation based +// on the STL functions. The main benefit here is +// that we access the underlying container. +template +class heap_t { + public: + heap_t() = default; + virtual ~heap_t() = default; + + void push(const T& node) + { + buffer.push_back(node); + std::push_heap(buffer.begin(), buffer.end(), comp); + } + + void push(T&& node) + { + buffer.push_back(std::move(node)); + std::push_heap(buffer.begin(), buffer.end(), comp); + } + + template + void emplace(Args&&... args) + { + buffer.emplace_back(std::forward(args)...); + std::push_heap(buffer.begin(), buffer.end(), comp); + } + + std::optional pop() + { + if (buffer.empty()) return std::nullopt; + + std::pop_heap(buffer.begin(), buffer.end(), comp); + T node = std::move(buffer.back()); + buffer.pop_back(); + return node; + } + + size_t size() const { return buffer.size(); } + T& top() { return buffer.front(); } + void clear() { buffer.clear(); } + bool empty() const { return buffer.empty(); } + + private: + std::vector buffer; + Comp comp; +}; + +// A queue storing the nodes waiting to be explored/dived from. +template +class node_queue_t { + private: + struct heap_entry_t { + mip_node_t* node = nullptr; + f_t lower_bound = -inf; + f_t score = inf; + + heap_entry_t(mip_node_t* new_node) + : node(new_node), lower_bound(new_node->lower_bound), score(new_node->objective_estimate) + { + } + }; + + // Comparision function for ordering the nodes based on their lower bound with + // lowest one being explored first. + struct lower_bound_comp { + bool operator()(const std::shared_ptr& a, const std::shared_ptr& b) + { + // `a` will be placed after `b` + return a->lower_bound > b->lower_bound; + } + }; + + // Comparision function for ordering the nodes based on some score (currently the pseudocost + // estimate) with the lowest being explored first. + struct score_comp { + bool operator()(const std::shared_ptr& a, const std::shared_ptr& b) + { + // `a` will be placed after `b` + return a->score > b->score; + } + }; + + heap_t, lower_bound_comp> best_first_heap; + heap_t, score_comp> diving_heap; + omp_mutex_t mutex; + + public: + void push(mip_node_t* new_node) + { + std::lock_guard lock(mutex); + auto entry = std::make_shared(new_node); + best_first_heap.push(entry); + diving_heap.push(entry); + } + + std::optional*> pop_best_first() + { + std::lock_guard lock(mutex); + auto entry = best_first_heap.pop(); + + if (entry.has_value()) { return std::exchange(entry.value()->node, nullptr); } + + return std::nullopt; + } + + std::optional*> pop_diving() + { + std::lock_guard lock(mutex); + + while (!diving_heap.empty()) { + auto entry = diving_heap.pop(); + + if (entry.has_value()) { + if (auto node_ptr = entry.value()->node; node_ptr != nullptr) { return node_ptr; } + } + } + + return std::nullopt; + } + + i_t diving_queue_size() + { + std::lock_guard lock(mutex); + return diving_heap.size(); + } + + i_t best_first_queue_size() + { + std::lock_guard lock(mutex); + return best_first_heap.size(); + } + + f_t get_lower_bound() + { + std::lock_guard lock(mutex); + return best_first_heap.empty() ? inf : best_first_heap.top()->lower_bound; + } + + mip_node_t* bfs_top() + { + std::lock_guard lock(mutex); + return best_first_heap.empty() ? nullptr : best_first_heap.top()->node; + } +}; + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 56298ef4d..2bc00f636 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -623,14 +623,17 @@ f_t compute_initial_primal_infeasibilities(const lp_problem_t& lp, const std::vector& basic_list, const std::vector& x, std::vector& squared_infeasibilities, - std::vector& infeasibility_indices) + std::vector& infeasibility_indices, + f_t& primal_inf) { const i_t m = lp.num_rows; const i_t n = lp.num_cols; - squared_infeasibilities.resize(n, 0.0); + squared_infeasibilities.resize(n); + std::fill(squared_infeasibilities.begin(), squared_infeasibilities.end(), 0.0); infeasibility_indices.reserve(n); infeasibility_indices.clear(); - f_t primal_inf = 0.0; + f_t primal_inf_squared = 0.0; + primal_inf = 0.0; for (i_t k = 0; k < m; ++k) { const i_t j = basic_list[k]; const f_t lower_infeas = lp.lower[j] - x[j]; @@ -640,10 +643,11 @@ f_t compute_initial_primal_infeasibilities(const lp_problem_t& lp, const f_t square_infeas = infeas * infeas; squared_infeasibilities[j] = square_infeas; infeasibility_indices.push_back(j); - primal_inf += square_infeas; + primal_inf_squared += square_infeas; + primal_inf += infeas; } } - return primal_inf; + return primal_inf_squared; } template @@ -2241,7 +2245,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, assert(superbasic_list.size() == 0); assert(nonbasic_list.size() == n - m); - if (ft.refactor_basis(lp.A, settings, basic_list, nonbasic_list, vstatus) > 0) { + if (ft.refactor_basis(lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus) > + 0) { return dual::status_t::NUMERICAL; } @@ -2268,7 +2273,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, #ifdef COMPUTE_DUAL_RESIDUAL std::vector dual_res1; - compute_dual_residual(lp.A, objective, y, z, dual_res1); + phase2::compute_dual_residual(lp.A, objective, y, z, dual_res1); f_t dual_res_norm = vector_norm_inf(dual_res1); if (dual_res_norm > settings.tight_tol) { settings.log.printf("|| A'*y + z - c || %e\n", dual_res_norm); @@ -2357,8 +2362,15 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, std::vector bounded_variables(n, 0); phase2::compute_bounded_info(lp.lower, lp.upper, bounded_variables); - f_t primal_infeasibility = phase2::compute_initial_primal_infeasibilities( - lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); + f_t primal_infeasibility; + f_t primal_infeasibility_squared = + phase2::compute_initial_primal_infeasibilities(lp, + settings, + basic_list, + x, + squared_infeasibilities, + infeasibility_indices, + primal_infeasibility); #ifdef CHECK_BASIC_INFEASIBILITIES phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 0); @@ -2556,9 +2568,15 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, std::vector unperturbed_x(n); phase2::compute_primal_solution_from_basis( lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x); - x = unperturbed_x; - primal_infeasibility = phase2::compute_initial_primal_infeasibilities( - lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); + x = unperturbed_x; + primal_infeasibility_squared = + phase2::compute_initial_primal_infeasibilities(lp, + settings, + basic_list, + x, + squared_infeasibilities, + infeasibility_indices, + primal_infeasibility); settings.log.printf("Updated primal infeasibility: %e\n", primal_infeasibility); objective = lp.objective; @@ -2593,9 +2611,15 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, std::vector unperturbed_x(n); phase2::compute_primal_solution_from_basis( lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x); - x = unperturbed_x; - primal_infeasibility = phase2::compute_initial_primal_infeasibilities( - lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); + x = unperturbed_x; + primal_infeasibility_squared = + phase2::compute_initial_primal_infeasibilities(lp, + settings, + basic_list, + x, + squared_infeasibilities, + infeasibility_indices, + primal_infeasibility); const f_t orig_dual_infeas = phase2::dual_infeasibility( lp, settings, vstatus, z, settings.tight_tol, settings.dual_tol); @@ -2810,7 +2834,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, delta_xB_0_sparse.i, squared_infeasibilities, infeasibility_indices, - primal_infeasibility); + primal_infeasibility_squared); // Update primal infeasibilities due to changes in basic variables // from the leaving and entering variables phase2::update_primal_infeasibilities(lp, @@ -2822,7 +2846,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, scaled_delta_xB_sparse.i, squared_infeasibilities, infeasibility_indices, - primal_infeasibility); + primal_infeasibility_squared); // Update the entering variable phase2::update_single_primal_infeasibility(lp.lower, lp.upper, @@ -2883,14 +2907,15 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, #endif if (should_refactor) { bool should_recompute_x = false; - if (ft.refactor_basis(lp.A, settings, basic_list, nonbasic_list, vstatus) > 0) { + if (ft.refactor_basis( + lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus) > 0) { should_recompute_x = true; settings.log.printf("Failed to factorize basis. Iteration %d\n", iter); if (toc(start_time) > settings.time_limit) { return dual::status_t::TIME_LIMIT; } i_t count = 0; i_t deficient_size; - while ((deficient_size = - ft.refactor_basis(lp.A, settings, basic_list, nonbasic_list, vstatus)) > 0) { + while ((deficient_size = ft.refactor_basis( + lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus)) > 0) { settings.log.printf("Failed to repair basis. Iteration %d. %d deficient columns.\n", iter, static_cast(deficient_size)); @@ -2912,8 +2937,14 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x); x = unperturbed_x; } - phase2::compute_initial_primal_infeasibilities( - lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); + primal_infeasibility_squared = + phase2::compute_initial_primal_infeasibilities(lp, + settings, + basic_list, + x, + squared_infeasibilities, + infeasibility_indices, + primal_infeasibility); } #ifdef CHECK_BASIC_INFEASIBILITIES phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 7); @@ -2951,7 +2982,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, iter, compute_user_objective(lp, obj), infeasibility_indices.size(), - primal_infeasibility, + primal_infeasibility_squared, sum_perturb, now); } diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index d247fbf67..6c8c3ae4a 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -621,7 +621,7 @@ void convert_user_problem(const user_problem_t& user_problem, } constexpr bool run_bounds_strengthening = false; - if (run_bounds_strengthening) { + if constexpr (run_bounds_strengthening) { csr_matrix_t Arow(1, 1, 1); problem.A.to_compressed_row(Arow); @@ -629,8 +629,8 @@ void convert_user_problem(const user_problem_t& user_problem, // Empty var_types means that all variables are continuous bounds_strengthening_t strengthening(problem, Arow, row_sense, {}); - std::fill(strengthening.bounds_changed.begin(), strengthening.bounds_changed.end(), true); - strengthening.bounds_strengthening(problem.lower, problem.upper, settings); + std::vector bounds_changed(problem.num_cols, true); + strengthening.bounds_strengthening(problem.lower, problem.upper, bounds_changed, settings); } settings.log.debug( diff --git a/cpp/src/dual_simplex/primal.cpp b/cpp/src/dual_simplex/primal.cpp index 80406dcf0..69f15ba18 100644 --- a/cpp/src/dual_simplex/primal.cpp +++ b/cpp/src/dual_simplex/primal.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -298,7 +298,15 @@ primal::status_t primal_phase2(i_t phase, factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); - basis_repair(lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); + basis_repair(lp.A, + settings, + lp.lower, + lp.upper, + deficient, + slacks_needed, + basic_list, + nonbasic_list, + vstatus); if (factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); diff --git a/cpp/src/dual_simplex/pseudo_costs.cpp b/cpp/src/dual_simplex/pseudo_costs.cpp index 9f84e108d..e30471f9a 100644 --- a/cpp/src/dual_simplex/pseudo_costs.cpp +++ b/cpp/src/dual_simplex/pseudo_costs.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -133,6 +133,75 @@ void strong_branch_helper(i_t start, } } +template +f_t trial_branching(const lp_problem_t& original_lp, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + const std::vector& vstatus, + const std::vector& edge_norms, + const basis_update_mpf_t& basis_factors, + const std::vector& basic_list, + const std::vector& nonbasic_list, + i_t branch_var, + f_t branch_var_lower, + f_t branch_var_upper, + f_t upper_bound, + i_t bnb_lp_iter_per_node, + omp_atomic_t& total_lp_iter) +{ + lp_problem_t child_problem = original_lp; + child_problem.lower[branch_var] = branch_var_lower; + child_problem.upper[branch_var] = branch_var_upper; + + simplex_solver_settings_t child_settings = settings; + child_settings.set_log(false); + f_t lp_start_time = tic(); + child_settings.iteration_limit = std::clamp(bnb_lp_iter_per_node, 10, 100); + child_settings.cut_off = upper_bound + settings.dual_tol; + child_settings.inside_mip = 2; + child_settings.scale_columns = false; + + lp_solution_t solution(original_lp.num_rows, original_lp.num_cols); + i_t iter = 0; + std::vector child_vstatus = vstatus; + std::vector child_edge_norms = edge_norms; + std::vector child_basic_list = basic_list; + std::vector child_nonbasic_list = nonbasic_list; + basis_update_mpf_t child_basis_factors = basis_factors; + + dual::status_t status = dual_phase2_with_advanced_basis(2, + 0, + false, + lp_start_time, + child_problem, + child_settings, + child_vstatus, + child_basis_factors, + child_basic_list, + child_nonbasic_list, + solution, + iter, + child_edge_norms); + total_lp_iter += iter; + settings.log.debug("Trial branching on variable %d. Lo: %e Up: %e. Iter %d. Status %s. Obj %e\n", + branch_var, + child_problem.lower[branch_var], + child_problem.upper[branch_var], + iter, + dual::status_to_string(status).c_str(), + compute_objective(child_problem, solution.x)); + + if (status == dual::status_t::DUAL_UNBOUNDED) { + // LP was infeasible + return std::numeric_limits::infinity(); + } else if (status == dual::status_t::OPTIMAL || status == dual::status_t::ITERATION_LIMIT || + status == dual::status_t::CUTOFF) { + return compute_objective(child_problem, solution.x); + } else { + return std::numeric_limits::quiet_NaN(); + } +} + } // namespace template @@ -148,8 +217,8 @@ void strong_branching(const lp_problem_t& original_lp, pseudo_costs_t& pc) { pc.resize(original_lp.num_cols); - pc.strong_branch_down.resize(fractional.size()); - pc.strong_branch_up.resize(fractional.size()); + pc.strong_branch_down.assign(fractional.size(), 0); + pc.strong_branch_up.assign(fractional.size(), 0); pc.num_strong_branches_completed = 0; settings.log.printf("Strong branching using %d threads and %ld fractional variables\n", @@ -199,7 +268,7 @@ template void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_ptr, f_t leaf_objective) { - mutex.lock(); + std::lock_guard lock(pseudo_cost_mutex[node_ptr->branch_var]); const f_t change_in_obj = leaf_objective - node_ptr->lower_bound; const f_t frac = node_ptr->branch_dir == rounding_direction_t::DOWN ? node_ptr->fractional_val - std::floor(node_ptr->fractional_val) @@ -211,14 +280,13 @@ void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_pt pseudo_cost_sum_up[node_ptr->branch_var] += change_in_obj / frac; pseudo_cost_num_up[node_ptr->branch_var]++; } - mutex.unlock(); } template void pseudo_costs_t::initialized(i_t& num_initialized_down, i_t& num_initialized_up, f_t& pseudo_cost_down_avg, - f_t& pseudo_cost_up_avg) const + f_t& pseudo_cost_up_avg) { num_initialized_down = 0; num_initialized_up = 0; @@ -226,6 +294,8 @@ void pseudo_costs_t::initialized(i_t& num_initialized_down, pseudo_cost_up_avg = 0; const i_t n = pseudo_cost_sum_down.size(); for (i_t j = 0; j < n; j++) { + std::lock_guard lock(pseudo_cost_mutex[j]); + if (pseudo_cost_num_down[j] > 0) { num_initialized_down++; if (std::isfinite(pseudo_cost_sum_down[j])) { @@ -258,8 +328,6 @@ i_t pseudo_costs_t::variable_selection(const std::vector& fractio const std::vector& solution, logger_t& log) { - mutex.lock(); - const i_t num_fractional = fractional.size(); std::vector pseudo_cost_up(num_fractional); std::vector pseudo_cost_down(num_fractional); @@ -272,14 +340,16 @@ i_t pseudo_costs_t::variable_selection(const std::vector& fractio initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); - log.printf("PC: num initialized down %d up %d avg down %e up %e\n", - num_initialized_down, - num_initialized_up, - pseudo_cost_down_avg, - pseudo_cost_up_avg); + log.debug("PC: num initialized down %d up %d avg down %e up %e\n", + num_initialized_down, + num_initialized_up, + pseudo_cost_down_avg, + pseudo_cost_up_avg); for (i_t k = 0; k < num_fractional; k++) { const i_t j = fractional[k]; + + pseudo_cost_mutex[j].lock(); if (pseudo_cost_num_down[j] != 0) { pseudo_cost_down[k] = pseudo_cost_sum_down[j] / pseudo_cost_num_down[j]; } else { @@ -291,6 +361,8 @@ i_t pseudo_costs_t::variable_selection(const std::vector& fractio } else { pseudo_cost_up[k] = pseudo_cost_up_avg; } + pseudo_cost_mutex[j].unlock(); + constexpr f_t eps = 1e-6; const f_t f_down = solution[j] - std::floor(solution[j]); const f_t f_up = std::ceil(solution[j]) - solution[j]; @@ -309,14 +381,196 @@ i_t pseudo_costs_t::variable_selection(const std::vector& fractio } } - log.printf( - "pc branching on %d. Value %e. Score %e\n", branch_var, solution[branch_var], score[select]); + log.debug("Pseudocost branching on %d. Value %e. Score %e.\n", + branch_var, + solution[branch_var], + score[select]); + + return branch_var; +} + +template +i_t pseudo_costs_t::reliable_variable_selection( + const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + const std::vector& vstatus, + const std::vector& edge_norms, + const std::vector& fractional, + const std::vector& solution, + const basis_update_mpf_t& basis_factors, + const std::vector& basic_list, + const std::vector& nonbasic_list, + f_t current_obj, + f_t upper_bound, + i_t bnb_lp_iter, + i_t bnb_explored_nodes, + logger_t& log) +{ + i_t branch_var = fractional[0]; + f_t max_score = -1; + i_t num_initialized_down; + i_t num_initialized_up; + f_t pseudo_cost_down_avg; + f_t pseudo_cost_up_avg; - mutex.unlock(); + initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); + + log.printf("PC: num initialized down %d up %d avg down %e up %e\n", + num_initialized_down, + num_initialized_up, + pseudo_cost_down_avg, + pseudo_cost_up_avg); + + // const i_t max_iter = 0.5 * bnb_lp_iter; + // const i_t gamma = (max_iter - total_lp_iter) / (total_lp_iter + 1); + // const i_t max_v = 5; + // const i_t min_v = 1; + // i_t reliable_threshold = std::clamp((1 - gamma) * min_v + gamma * max_v, min_v, max_v); + // reliable_threshold = total_lp_iter < max_iter ? reliable_threshold : 0; + i_t reliable_threshold = 1; + + settings.log.debug("RB LP iterations = %d, B&B LP iterations = %d reliable_threshold = %d\n", + total_lp_iter.load(), + bnb_lp_iter, + reliable_threshold); + + std::vector pending = fractional; + std::vector conflict; + conflict.reserve(fractional.size()); + + while (!pending.empty()) { + for (auto j : pending) { + bool is_locked = pseudo_cost_mutex[j].try_lock(); + + if (!is_locked) { + conflict.push_back(j); + continue; + } + + if (pseudo_cost_num_down[j] < reliable_threshold) { + // Do trial branching on the down branch + f_t obj = trial_branching(lp, + settings, + var_types, + vstatus, + edge_norms, + basis_factors, + basic_list, + nonbasic_list, + j, + lp.lower[j], + std::floor(solution[j]), + upper_bound, + bnb_lp_iter / bnb_explored_nodes, + total_lp_iter); + if (!std::isnan(obj)) { + f_t change_in_obj = obj - current_obj; + f_t change_in_x = solution[j] - std::floor(solution[j]); + pseudo_cost_sum_down[j] += change_in_obj / change_in_x; + pseudo_cost_num_down[j]++; + } + } + + if (pseudo_cost_num_up[j] < reliable_threshold) { + f_t obj = trial_branching(lp, + settings, + var_types, + vstatus, + edge_norms, + basis_factors, + basic_list, + nonbasic_list, + j, + std::ceil(solution[j]), + lp.upper[j], + upper_bound, + bnb_lp_iter / bnb_explored_nodes, + total_lp_iter); + + if (!std::isnan(obj)) { + f_t change_in_obj = obj - current_obj; + f_t change_in_x = std::ceil(solution[j]) - solution[j]; + pseudo_cost_sum_up[j] += change_in_obj / change_in_x; + pseudo_cost_num_up[j]++; + } + } + + f_t pc_up = pseudo_cost_num_up[j] > 0 ? pseudo_cost_sum_up[j] / pseudo_cost_num_up[j] + : pseudo_cost_up_avg; + f_t pc_down = pseudo_cost_sum_down[j] > 0 ? pseudo_cost_sum_down[j] / pseudo_cost_num_down[j] + : pseudo_cost_down_avg; + + pseudo_cost_mutex[j].unlock(); + + constexpr f_t eps = 1e-6; + const f_t f_down = solution[j] - std::floor(solution[j]); + const f_t f_up = std::ceil(solution[j]) - solution[j]; + f_t score = std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); + + if (score > max_score) { + max_score = score; + branch_var = j; + } + } + + std::swap(pending, conflict); + conflict.clear(); + } + + log.printf( + "pc branching on %d. Value %e. Score %e\n", branch_var, solution[branch_var], max_score); return branch_var; } +template +f_t pseudo_costs_t::obj_estimate(const std::vector& fractional, + const std::vector& solution, + f_t lower_bound, + logger_t& log) +{ + const i_t num_fractional = fractional.size(); + f_t estimate = lower_bound; + + i_t num_initialized_down; + i_t num_initialized_up; + f_t pseudo_cost_down_avg; + f_t pseudo_cost_up_avg; + + initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); + + for (i_t k = 0; k < num_fractional; k++) { + const i_t j = fractional[k]; + + f_t pseudo_cost_down = 0; + f_t pseudo_cost_up = 0; + + pseudo_cost_mutex[j].lock(); + if (pseudo_cost_num_down[j] != 0) { + pseudo_cost_down = pseudo_cost_sum_down[j] / pseudo_cost_num_down[j]; + } else { + pseudo_cost_down = pseudo_cost_down_avg; + } + + if (pseudo_cost_num_up[j] != 0) { + pseudo_cost_up = pseudo_cost_sum_up[j] / pseudo_cost_num_up[j]; + } else { + pseudo_cost_up = pseudo_cost_up_avg; + } + pseudo_cost_mutex[j].unlock(); + + constexpr f_t eps = 1e-6; + const f_t f_down = solution[j] - std::floor(solution[j]); + const f_t f_up = std::ceil(solution[j]) - solution[j]; + estimate += + std::min(std::max(pseudo_cost_down * f_down, eps), std::max(pseudo_cost_up * f_up, eps)); + } + + log.debug("pseudocost estimate = %e\n", estimate); + return estimate; +} + template void pseudo_costs_t::update_pseudo_costs_from_strong_branching( const std::vector& fractional, const std::vector& root_soln) diff --git a/cpp/src/dual_simplex/pseudo_costs.hpp b/cpp/src/dual_simplex/pseudo_costs.hpp index 799cdc3ff..8e079d0da 100644 --- a/cpp/src/dual_simplex/pseudo_costs.hpp +++ b/cpp/src/dual_simplex/pseudo_costs.hpp @@ -1,12 +1,13 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ #pragma once +#include #include #include #include @@ -24,7 +25,8 @@ class pseudo_costs_t { : pseudo_cost_sum_down(num_variables), pseudo_cost_sum_up(num_variables), pseudo_cost_num_down(num_variables), - pseudo_cost_num_up(num_variables) + pseudo_cost_num_up(num_variables), + pseudo_cost_mutex(num_variables) { } @@ -32,21 +34,43 @@ class pseudo_costs_t { void resize(i_t num_variables) { - pseudo_cost_sum_down.resize(num_variables); - pseudo_cost_sum_up.resize(num_variables); - pseudo_cost_num_down.resize(num_variables); - pseudo_cost_num_up.resize(num_variables); + pseudo_cost_sum_down.assign(num_variables, 0); + pseudo_cost_sum_up.assign(num_variables, 0); + pseudo_cost_num_down.assign(num_variables, 0); + pseudo_cost_num_up.assign(num_variables, 0); + pseudo_cost_mutex.resize(num_variables); } void initialized(i_t& num_initialized_down, i_t& num_initialized_up, f_t& pseudo_cost_down_avg, - f_t& pseudo_cost_up_avg) const; + f_t& pseudo_cost_up_avg); i_t variable_selection(const std::vector& fractional, const std::vector& solution, logger_t& log); + i_t reliable_variable_selection(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + const std::vector& vstatus, + const std::vector& edge_norms, + const std::vector& fractional, + const std::vector& solution, + const basis_update_mpf_t& basis_factors, + const std::vector& basic_list, + const std::vector& nonbasic_list, + f_t current_obj, + f_t upper_bound, + i_t bnb_lp_iter, + i_t bnb_explored_nodes, + logger_t& log); + + f_t obj_estimate(const std::vector& fractional, + const std::vector& solution, + f_t lower_bound, + logger_t& log); + void update_pseudo_costs_from_strong_branching(const std::vector& fractional, const std::vector& root_soln); std::vector pseudo_cost_sum_up; @@ -55,9 +79,9 @@ class pseudo_costs_t { std::vector pseudo_cost_num_down; std::vector strong_branch_down; std::vector strong_branch_up; - - omp_mutex_t mutex; + std::vector pseudo_cost_mutex; omp_atomic_t num_strong_branches_completed = 0; + omp_atomic_t total_lp_iter = 0; }; template diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index a1cc049e7..c46eda085 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -13,12 +13,28 @@ #include #include #include +#include #include #include #include namespace cuopt::linear_programming::dual_simplex { +template +struct diving_heuristics_settings_t { + i_t num_diving_workers = -1; + + bool disable_line_search_diving = false; + bool disable_pseudocost_diving = false; + bool disable_guided_diving = false; + bool disable_coefficient_diving = false; + + i_t min_node_depth = 10; + i_t node_limit = 500; + f_t iteration_limit_factor = 0.05; + i_t backtrack = 5; +}; + template struct simplex_solver_settings_t { public: @@ -70,14 +86,14 @@ struct simplex_solver_settings_t { iteration_log_frequency(1000), first_iteration_log(2), num_threads(omp_get_max_threads() - 1), - num_bfs_threads(std::min(num_threads / 4, 1)), - num_diving_threads(std::min(num_threads - num_bfs_threads, 1)), + num_bfs_workers(std::max(num_threads / 4, 1)), random_seed(0), inside_mip(0), solution_callback(nullptr), heuristic_preemption_callback(nullptr), concurrent_halt(nullptr) { + diving_settings.num_diving_workers = std::max(num_threads - num_bfs_workers, 1); } void set_log(bool logging) const { log.log = logging; } @@ -137,8 +153,10 @@ struct simplex_solver_settings_t { i_t first_iteration_log; // number of iterations to log at beginning of solve i_t num_threads; // number of threads to use i_t random_seed; // random seed - i_t num_bfs_threads; // number of threads dedicated to the best-first search - i_t num_diving_threads; // number of threads dedicated to diving + i_t num_bfs_workers; // number of threads dedicated to the best-first search + + diving_heuristics_settings_t diving_settings; // Settings for the diving heuristics + i_t inside_mip; // 0 if outside MIP, 1 if inside MIP at root node, 2 if inside MIP at leaf node std::function&, f_t)> solution_callback; std::function&, f_t)> node_processed_callback; diff --git a/cpp/src/mip/diversity/lns/rins.cu b/cpp/src/mip/diversity/lns/rins.cu index ba648f30e..7394e2db6 100644 --- a/cpp/src/mip/diversity/lns/rins.cu +++ b/cpp/src/mip/diversity/lns/rins.cu @@ -256,13 +256,19 @@ void rins_t::run_rins() branch_and_bound_settings.absolute_mip_gap_tol = context.settings.tolerances.absolute_mip_gap; branch_and_bound_settings.relative_mip_gap_tol = std::min(current_mip_gap, (f_t)settings.target_mip_gap); - branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; - branch_and_bound_settings.num_threads = 2; - branch_and_bound_settings.num_bfs_threads = 1; - branch_and_bound_settings.num_diving_threads = 1; - branch_and_bound_settings.log.log = false; - branch_and_bound_settings.log.log_prefix = "[RINS] "; - branch_and_bound_settings.solution_callback = [this, &rins_solution_queue]( + branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; + branch_and_bound_settings.num_threads = 2; + branch_and_bound_settings.num_bfs_workers = 1; + + // In the future, let RINS use all the diving heuristics. For now, + // restricting to guided diving. + branch_and_bound_settings.diving_settings.num_diving_workers = 1; + branch_and_bound_settings.diving_settings.disable_line_search_diving = true; + branch_and_bound_settings.diving_settings.disable_coefficient_diving = true; + branch_and_bound_settings.diving_settings.disable_pseudocost_diving = true; + branch_and_bound_settings.log.log = false; + branch_and_bound_settings.log.log_prefix = "[RINS] "; + branch_and_bound_settings.solution_callback = [this, &rins_solution_queue]( std::vector& solution, f_t objective) { rins_solution_queue.push_back(solution); }; diff --git a/cpp/src/mip/diversity/recombiners/sub_mip.cuh b/cpp/src/mip/diversity/recombiners/sub_mip.cuh index 5be807372..d46c6b31a 100644 --- a/cpp/src/mip/diversity/recombiners/sub_mip.cuh +++ b/cpp/src/mip/diversity/recombiners/sub_mip.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -103,9 +103,15 @@ class sub_mip_recombiner_t : public recombiner_t { branch_and_bound_settings.relative_mip_gap_tol = context.settings.tolerances.relative_mip_gap; branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; branch_and_bound_settings.num_threads = 2; - branch_and_bound_settings.num_bfs_threads = 1; - branch_and_bound_settings.num_diving_threads = 1; - branch_and_bound_settings.solution_callback = [this](std::vector& solution, + branch_and_bound_settings.num_bfs_workers = 1; + + // In the future, let SubMIP use all the diving heuristics. For now, + // restricting to guided diving. + branch_and_bound_settings.diving_settings.num_diving_workers = 1; + branch_and_bound_settings.diving_settings.disable_line_search_diving = true; + branch_and_bound_settings.diving_settings.disable_coefficient_diving = true; + branch_and_bound_settings.diving_settings.disable_pseudocost_diving = true; + branch_and_bound_settings.solution_callback = [this](std::vector& solution, f_t objective) { this->solution_callback(solution, objective); }; diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu index 7311a26fd..08e1806b9 100644 --- a/cpp/src/mip/solver.cu +++ b/cpp/src/mip/solver.cu @@ -173,13 +173,12 @@ solution_t mip_solver_t::run_solver() } else { branch_and_bound_settings.num_threads = std::max(1, context.settings.num_cpu_threads); } - CUOPT_LOG_INFO("Using %d CPU threads for B&B", branch_and_bound_settings.num_threads); - i_t num_threads = branch_and_bound_settings.num_threads; - i_t num_bfs_threads = std::max(1, num_threads / 4); - i_t num_diving_threads = std::max(1, num_threads - num_bfs_threads); - branch_and_bound_settings.num_bfs_threads = num_bfs_threads; - branch_and_bound_settings.num_diving_threads = num_diving_threads; + i_t num_threads = branch_and_bound_settings.num_threads; + i_t num_bfs_workers = std::max(1, num_threads / 4); + i_t num_diving_workers = std::max(1, num_threads - num_bfs_workers); + branch_and_bound_settings.num_bfs_workers = num_bfs_workers; + branch_and_bound_settings.diving_settings.num_diving_workers = num_diving_workers; // Set the branch and bound -> primal heuristics callback branch_and_bound_settings.solution_callback = diff --git a/cpp/src/utilities/omp_helpers.hpp b/cpp/src/utilities/omp_helpers.hpp index 33eda66cb..e1b68bf88 100644 --- a/cpp/src/utilities/omp_helpers.hpp +++ b/cpp/src/utilities/omp_helpers.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -53,7 +53,7 @@ class omp_atomic_t { T operator--() { return fetch_sub(T(1)) - 1; } T operator--(int) { return fetch_sub(T(1)); } - T load() + T load() const { T res; #pragma omp atomic read