diff --git a/cpp/src/dual_simplex/CMakeLists.txt b/cpp/src/dual_simplex/CMakeLists.txt index e8a9b5dce..af1415fa9 100644 --- a/cpp/src/dual_simplex/CMakeLists.txt +++ b/cpp/src/dual_simplex/CMakeLists.txt @@ -1,5 +1,5 @@ # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on @@ -31,6 +31,7 @@ set(DUAL_SIMPLEX_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/triangle_solve.cpp ${CMAKE_CURRENT_SOURCE_DIR}/vector_math.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pinned_host_allocator.cu + ${CMAKE_CURRENT_SOURCE_DIR}/diving_heuristics.cpp ) # Uncomment to enable debug info diff --git a/cpp/src/dual_simplex/basis_solves.cpp b/cpp/src/dual_simplex/basis_solves.cpp index db24f55a2..f5cd54053 100644 --- a/cpp/src/dual_simplex/basis_solves.cpp +++ b/cpp/src/dual_simplex/basis_solves.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -613,6 +613,8 @@ i_t factorize_basis(const csc_matrix_t& A, template i_t basis_repair(const csc_matrix_t& A, const simplex_solver_settings_t& settings, + const std::vector& lower, + const std::vector& upper, const std::vector& deficient, const std::vector& slacks_needed, std::vector& basis_list, @@ -658,7 +660,15 @@ i_t basis_repair(const csc_matrix_t& A, nonbasic_list[nonbasic_map[replace_j]] = bad_j; vstatus[replace_j] = variable_status_t::BASIC; // This is the main issue. What value should bad_j take on. - vstatus[bad_j] = variable_status_t::NONBASIC_FREE; + if (lower[bad_j] == -inf && upper[bad_j] == inf) { + vstatus[bad_j] = variable_status_t::NONBASIC_FREE; + } else if (lower[bad_j] > -inf) { + vstatus[bad_j] = variable_status_t::NONBASIC_LOWER; + } else if (upper[bad_j] < inf) { + vstatus[bad_j] = variable_status_t::NONBASIC_UPPER; + } else { + assert(1 == 0); + } } return 0; @@ -849,6 +859,8 @@ template int factorize_basis(const csc_matrix_t& A, template int basis_repair(const csc_matrix_t& A, const simplex_solver_settings_t& settings, + const std::vector& lower, + const std::vector& upper, const std::vector& deficient, const std::vector& slacks_needed, std::vector& basis_list, diff --git a/cpp/src/dual_simplex/basis_solves.hpp b/cpp/src/dual_simplex/basis_solves.hpp index b668c0f46..295bedccd 100644 --- a/cpp/src/dual_simplex/basis_solves.hpp +++ b/cpp/src/dual_simplex/basis_solves.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -42,6 +42,8 @@ i_t factorize_basis(const csc_matrix_t& A, template i_t basis_repair(const csc_matrix_t& A, const simplex_solver_settings_t& settings, + const std::vector& lower, + const std::vector& upper, const std::vector& deficient, const std::vector& slacks_needed, std::vector& basis_list, diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index 6b79f3c86..2c781a515 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -2046,6 +2046,8 @@ template int basis_update_mpf_t::refactor_basis( const csc_matrix_t& A, const simplex_solver_settings_t& settings, + const std::vector& lower, + const std::vector& upper, std::vector& basic_list, std::vector& nonbasic_list, std::vector& vstatus) @@ -2066,7 +2068,8 @@ int basis_update_mpf_t::refactor_basis( deficient, slacks_needed) == -1) { settings.log.debug("Initial factorization failed\n"); - basis_repair(A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); + basis_repair( + A, settings, lower, upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); #ifdef CHECK_BASIS_REPAIR const i_t m = A.m; diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index cea907074..afd4f4c9a 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -373,6 +373,8 @@ class basis_update_mpf_t { // Compute L*U = A(p, basic_list) int refactor_basis(const csc_matrix_t& A, const simplex_solver_settings_t& settings, + const std::vector& lower, + const std::vector& upper, std::vector& basic_list, std::vector& nonbasic_list, std::vector& vstatus); diff --git a/cpp/src/dual_simplex/bounds_strengthening.cpp b/cpp/src/dual_simplex/bounds_strengthening.cpp index f1bf52c1e..4114e7e09 100644 --- a/cpp/src/dual_simplex/bounds_strengthening.cpp +++ b/cpp/src/dual_simplex/bounds_strengthening.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -154,7 +154,7 @@ bool bounds_strengthening_t::bounds_strengthening( bool is_infeasible = check_infeasibility(min_a, max_a, cnst_lb, cnst_ub, settings.primal_tol); if (is_infeasible) { - settings.log.printf( + settings.log.debug( "Iter:: %d, Infeasible constraint %d, cnst_lb %e, cnst_ub %e, min_a %e, max_a %e\n", iter, i, @@ -211,7 +211,7 @@ bool bounds_strengthening_t::bounds_strengthening( new_ub = std::min(new_ub, upper_bounds[k]); if (new_lb > new_ub + 1e-6) { - settings.log.printf( + settings.log.debug( "Iter:: %d, Infeasible variable after update %d, %e > %e\n", iter, k, new_lb, new_ub); return false; } diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 81db8a341..b2c9f85d2 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -5,10 +5,9 @@ */ /* clang-format on */ -#include -#include -#include #include + +#include #include #include #include @@ -20,6 +19,9 @@ #include #include +#include + +#include #include #include #include @@ -192,20 +194,31 @@ std::string user_mip_gap(f_t obj_value, f_t lower_bound) } } -inline const char* feasible_solution_symbol(thread_type_t type) +#ifdef SHOW_DIVING_TYPE +inline char feasible_solution_symbol(bnb_worker_type_t type) { switch (type) { - case thread_type_t::EXPLORATION: return "B "; - case thread_type_t::DIVING: return "D "; - default: return "U "; + case bnb_worker_type_t::BEST_FIRST: return 'B'; + case bnb_worker_type_t::COEFFICIENT_DIVING: return 'C'; + case bnb_worker_type_t::LINE_SEARCH_DIVING: return 'L'; + case bnb_worker_type_t::PSEUDOCOST_DIVING: return 'P'; + case bnb_worker_type_t::GUIDED_DIVING: return 'G'; + default: return 'U'; } } - -inline bool has_children(node_solve_info_t status) +#else +inline char feasible_solution_symbol(bnb_worker_type_t type) { - return status == node_solve_info_t::UP_CHILD_FIRST || - status == node_solve_info_t::DOWN_CHILD_FIRST; + switch (type) { + case bnb_worker_type_t::BEST_FIRST: return 'B'; + case bnb_worker_type_t::COEFFICIENT_DIVING: return 'D'; + case bnb_worker_type_t::LINE_SEARCH_DIVING: return 'D'; + case bnb_worker_type_t::PSEUDOCOST_DIVING: return 'D'; + case bnb_worker_type_t::GUIDED_DIVING: return 'D'; + default: return 'U'; + } } +#endif } // namespace @@ -216,59 +229,39 @@ branch_and_bound_t::branch_and_bound_t( : original_problem_(user_problem), settings_(solver_settings), original_lp_(user_problem.handle_ptr, 1, 1, 1), + Arow_(1, 1, 0), incumbent_(1), root_relax_soln_(1, 1), root_crossover_soln_(1, 1), pc_(1), - solver_status_(mip_exploration_status_t::UNSET) + solver_status_(mip_status_t::UNSET) { exploration_stats_.start_time = tic(); dualize_info_t dualize_info; convert_user_problem(original_problem_, settings_, original_lp_, new_slacks_, dualize_info); full_variable_types(original_problem_, original_lp_, var_types_); - mutex_upper_.lock(); upper_bound_ = inf; - mutex_upper_.unlock(); -} - -template -f_t branch_and_bound_t::get_upper_bound() -{ - mutex_upper_.lock(); - const f_t upper_bound = upper_bound_; - mutex_upper_.unlock(); - return upper_bound; } template f_t branch_and_bound_t::get_lower_bound() { - f_t lower_bound = lower_bound_ceiling_.load(); - mutex_heap_.lock(); - if (heap_.size() > 0) { lower_bound = std::min(heap_.top()->lower_bound, lower_bound); } - mutex_heap_.unlock(); + f_t lower_bound = lower_bound_ceiling_.load(); + f_t heap_lower_bound = node_queue_.get_lower_bound(); + lower_bound = std::min(heap_lower_bound, lower_bound); for (i_t i = 0; i < local_lower_bounds_.size(); ++i) { lower_bound = std::min(local_lower_bounds_[i].load(), lower_bound); } - return lower_bound; -} - -template -i_t branch_and_bound_t::get_heap_size() -{ - mutex_heap_.lock(); - i_t size = heap_.size(); - mutex_heap_.unlock(); - return size; + return std::isfinite(lower_bound) ? lower_bound : -inf; } template void branch_and_bound_t::report_heuristic(f_t obj) { - if (solver_status_ == mip_exploration_status_t::RUNNING) { + if (is_running) { f_t user_obj = compute_user_objective(original_lp_, obj); f_t user_lower = compute_user_objective(original_lp_, get_lower_bound()); std::string user_gap = user_mip_gap(user_obj, user_lower); @@ -287,10 +280,7 @@ void branch_and_bound_t::report_heuristic(f_t obj) } template -void branch_and_bound_t::report(std::string symbol, - f_t obj, - f_t lower_bound, - i_t node_depth) +void branch_and_bound_t::report(char symbol, f_t obj, f_t lower_bound, i_t node_depth) { i_t nodes_explored = exploration_stats_.nodes_explored; i_t nodes_unexplored = exploration_stats_.nodes_unexplored; @@ -298,8 +288,8 @@ void branch_and_bound_t::report(std::string symbol, f_t user_lower = compute_user_objective(original_lp_, lower_bound); f_t iter_node = exploration_stats_.total_lp_iters / nodes_explored; std::string user_gap = user_mip_gap(user_obj, user_lower); - settings_.log.printf("%s%10d %10lu %+13.6e %+10.6e %6d %7.1e %s %9.2f\n", - symbol.c_str(), + settings_.log.printf("%c %10d %10lu %+13.6e %+10.6e %6d %7.1e %s %9.2f\n", + symbol, nodes_explored, nodes_unexplored, user_obj, @@ -460,30 +450,24 @@ void branch_and_bound_t::repair_heuristic_solutions() } template -mip_status_t branch_and_bound_t::set_final_solution(mip_solution_t& solution, - f_t lower_bound) +void branch_and_bound_t::set_final_solution(mip_solution_t& solution, + f_t lower_bound) { - mip_status_t mip_status = mip_status_t::UNSET; - - if (solver_status_ == mip_exploration_status_t::NUMERICAL) { + if (solver_status_ == mip_status_t::NUMERICAL) { settings_.log.printf("Numerical issue encountered. Stopping the solver...\n"); - mip_status = mip_status_t::NUMERICAL; } - if (solver_status_ == mip_exploration_status_t::TIME_LIMIT) { + if (solver_status_ == mip_status_t::TIME_LIMIT) { settings_.log.printf("Time limit reached. Stopping the solver...\n"); - mip_status = mip_status_t::TIME_LIMIT; } - if (solver_status_ == mip_exploration_status_t::NODE_LIMIT) { + if (solver_status_ == mip_status_t::NODE_LIMIT) { settings_.log.printf("Node limit reached. Stopping the solver...\n"); - mip_status = mip_status_t::NODE_LIMIT; } - f_t upper_bound = get_upper_bound(); - f_t gap = upper_bound - lower_bound; - f_t obj = compute_user_objective(original_lp_, upper_bound); + f_t gap = upper_bound_ - lower_bound; + f_t obj = compute_user_objective(original_lp_, upper_bound_.load()); f_t user_bound = compute_user_objective(original_lp_, lower_bound); - f_t gap_rel = user_relative_gap(original_lp_, upper_bound, lower_bound); + f_t gap_rel = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); bool is_maximization = original_lp_.obj_scale < 0.0; settings_.log.printf("Explored %d nodes in %.2fs.\n", @@ -496,7 +480,7 @@ mip_status_t branch_and_bound_t::set_final_solution(mip_solution_t 0 && gap <= settings_.absolute_mip_gap_tol) { settings_.log.printf("Optimal solution found within absolute MIP gap tolerance (%.1e)\n", settings_.absolute_mip_gap_tol); @@ -511,18 +495,18 @@ mip_status_t branch_and_bound_t::set_final_solution(mip_solution_t 0 && exploration_stats_.nodes_unexplored == 0 && - upper_bound == inf) { + upper_bound_ == inf) { settings_.log.printf("Integer infeasible.\n"); - mip_status = mip_status_t::INFEASIBLE; + solver_status_ = mip_status_t::INFEASIBLE; if (settings_.heuristic_preemption_callback != nullptr) { settings_.heuristic_preemption_callback(); } } } - if (upper_bound != inf) { + if (upper_bound_ != inf) { assert(incumbent_.has_incumbent); uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x); } @@ -530,18 +514,20 @@ mip_status_t branch_and_bound_t::set_final_solution(mip_solution_t void branch_and_bound_t::add_feasible_solution(f_t leaf_objective, const std::vector& leaf_solution, i_t leaf_depth, - thread_type_t thread_type) + bnb_worker_type_t thread_type) { bool send_solution = false; + settings_.log.debug("%c found a feasible solution with obj=%.10e.\n", + feasible_solution_symbol(thread_type), + compute_user_objective(original_lp_, leaf_objective)); + mutex_upper_.lock(); if (leaf_objective < upper_bound_) { incumbent_.set_incumbent_solution(leaf_objective, leaf_solution); @@ -558,16 +544,18 @@ void branch_and_bound_t::add_feasible_solution(f_t leaf_objective, mutex_upper_.unlock(); } -template -rounding_direction_t branch_and_bound_t::child_selection(mip_node_t* node_ptr) +// Martin's criteria for the preferred rounding direction (see [1]) +// [1] A. Martin, “Integer Programs with Block Structure,” +// Technische Universit¨at Berlin, Berlin, 1999. Accessed: Aug. 08, 2025. +// [Online]. Available: https://opus4.kobv.de/opus4-zib/frontdoor/index/index/docId/391 +template +rounding_direction_t martin_criteria(f_t val, f_t root_val) { - const i_t branch_var = node_ptr->get_down_child()->branch_var; - const f_t branch_var_val = node_ptr->get_down_child()->fractional_val; - const f_t down_val = std::floor(root_relax_soln_.x[branch_var]); - const f_t up_val = std::ceil(root_relax_soln_.x[branch_var]); - const f_t down_dist = branch_var_val - down_val; - const f_t up_dist = up_val - branch_var_val; - constexpr f_t eps = 1e-6; + const f_t down_val = std::floor(root_val); + const f_t up_val = std::ceil(root_val); + const f_t down_dist = val - down_val; + const f_t up_dist = up_val - val; + constexpr f_t eps = 1e-6; if (down_dist < up_dist + eps) { return rounding_direction_t::DOWN; @@ -578,34 +566,85 @@ rounding_direction_t branch_and_bound_t::child_selection(mip_node_t -node_solve_info_t branch_and_bound_t::solve_node( +branch_variable_t branch_and_bound_t::variable_selection( + mip_node_t* node_ptr, + const std::vector& fractional, + const std::vector& solution, + bnb_worker_type_t type) +{ + logger_t log; + log.log = false; + i_t branch_var = -1; + rounding_direction_t round_dir = rounding_direction_t::NONE; + std::vector current_incumbent; + + // If there is no incumbent, use pseudocost diving instead of guided diving + if (upper_bound_ == inf && type == bnb_worker_type_t::GUIDED_DIVING) { + type = bnb_worker_type_t::PSEUDOCOST_DIVING; + } + + switch (type) { + case bnb_worker_type_t::BEST_FIRST: + branch_var = pc_.variable_selection(fractional, solution, log); + round_dir = martin_criteria(solution[branch_var], root_relax_soln_.x[branch_var]); + return {branch_var, round_dir}; + + case bnb_worker_type_t::COEFFICIENT_DIVING: + return coefficient_diving( + original_lp_, fractional, solution, var_up_locks_, var_down_locks_, log); + + case bnb_worker_type_t::LINE_SEARCH_DIVING: + return line_search_diving(fractional, solution, root_relax_soln_.x, log); + + case bnb_worker_type_t::PSEUDOCOST_DIVING: + return pseudocost_diving(pc_, fractional, solution, root_relax_soln_.x, log); + + case bnb_worker_type_t::GUIDED_DIVING: + mutex_upper_.lock(); + current_incumbent = incumbent_.x; + mutex_upper_.unlock(); + return guided_diving(pc_, fractional, solution, current_incumbent, log); + + default: + log.debug("Unknown variable selection method: %d\n", type); + return {-1, rounding_direction_t::NONE}; + } +} + +template +dual::status_t branch_and_bound_t::solve_node_lp( mip_node_t* node_ptr, - search_tree_t& search_tree, lp_problem_t& leaf_problem, + lp_solution_t& leaf_solution, basis_update_mpf_t& basis_factors, std::vector& basic_list, std::vector& nonbasic_list, bounds_strengthening_t& node_presolver, - thread_type_t thread_type, + bnb_worker_type_t thread_type, bool recompute_bounds_and_basis, const std::vector& root_lower, const std::vector& root_upper, + bnb_stats_t& stats, logger_t& log) { - const f_t abs_fathom_tol = settings_.absolute_mip_gap_tol / 10; - const f_t upper_bound = get_upper_bound(); - - lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); std::vector& leaf_vstatus = node_ptr->vstatus; assert(leaf_vstatus.size() == leaf_problem.num_cols); simplex_solver_settings_t lp_settings = settings_; lp_settings.set_log(false); - lp_settings.cut_off = upper_bound + settings_.dual_tol; + lp_settings.cut_off = upper_bound_ + settings_.dual_tol; lp_settings.inside_mip = 2; lp_settings.time_limit = settings_.time_limit - toc(exploration_stats_.start_time); lp_settings.scale_columns = false; + if (thread_type != bnb_worker_type_t::BEST_FIRST) { + i_t bnb_lp_iters = exploration_stats_.total_lp_iters; + f_t factor = settings_.diving_settings.iteration_limit_factor; + i_t max_iter = factor * bnb_lp_iters; + lp_settings.iteration_limit = max_iter - stats.total_lp_iters; + if (lp_settings.iteration_limit <= 0) { return dual::status_t::ITERATION_LIMIT; } + } + #ifdef LOG_NODE_SIMPLEX lp_settings.set_log(true); std::stringstream ss; @@ -682,30 +721,44 @@ node_solve_info_t branch_and_bound_t::solve_node( lp_status = convert_lp_status_to_dual_status(second_status); } - if (thread_type == thread_type_t::EXPLORATION) { - exploration_stats_.total_lp_solve_time += toc(lp_start_time); - exploration_stats_.total_lp_iters += node_iter; - } + stats.total_lp_solve_time += toc(lp_start_time); + stats.total_lp_iters += node_iter; } #ifdef LOG_NODE_SIMPLEX lp_settings.log.printf("\nLP status: %d\n\n", lp_status); #endif + return lp_status; +} + +template +std::pair branch_and_bound_t::update_tree( + mip_node_t* node_ptr, + search_tree_t& search_tree, + lp_problem_t& leaf_problem, + lp_solution_t& leaf_solution, + bnb_worker_type_t thread_type, + dual::status_t lp_status, + logger_t& log) +{ + const f_t abs_fathom_tol = settings_.absolute_mip_gap_tol / 10; + std::vector& leaf_vstatus = node_ptr->vstatus; + if (lp_status == dual::status_t::DUAL_UNBOUNDED) { // Node was infeasible. Do not branch node_ptr->lower_bound = inf; search_tree.graphviz_node(log, node_ptr, "infeasible", 0.0); search_tree.update(node_ptr, node_status_t::INFEASIBLE); - return node_solve_info_t::NO_CHILDREN; + return {node_status_t::INFEASIBLE, rounding_direction_t::NONE}; } else if (lp_status == dual::status_t::CUTOFF) { // Node was cut off. Do not branch - node_ptr->lower_bound = upper_bound; + node_ptr->lower_bound = upper_bound_; f_t leaf_objective = compute_objective(leaf_problem, leaf_solution.x); search_tree.graphviz_node(log, node_ptr, "cut off", leaf_objective); search_tree.update(node_ptr, node_status_t::FATHOMED); - return node_solve_info_t::NO_CHILDREN; + return {node_status_t::FATHOMED, rounding_direction_t::NONE}; } else if (lp_status == dual::status_t::OPTIMAL) { // LP was feasible @@ -718,10 +771,12 @@ node_solve_info_t branch_and_bound_t::solve_node( search_tree.graphviz_node(log, node_ptr, "lower bound", leaf_objective); pc_.update_pseudo_costs(node_ptr, leaf_objective); - if (settings_.node_processed_callback != nullptr) { - std::vector original_x; - uncrush_primal_solution(original_problem_, original_lp_, leaf_solution.x, original_x); - settings_.node_processed_callback(original_x, leaf_objective); + if (thread_type == bnb_worker_type_t::BEST_FIRST) { + if (settings_.node_processed_callback != nullptr) { + std::vector original_x; + uncrush_primal_solution(original_problem_, original_lp_, leaf_solution.x, original_x); + settings_.node_processed_callback(original_x, leaf_objective); + } } if (leaf_num_fractional == 0) { @@ -729,37 +784,39 @@ node_solve_info_t branch_and_bound_t::solve_node( add_feasible_solution(leaf_objective, leaf_solution.x, node_ptr->depth, thread_type); search_tree.graphviz_node(log, node_ptr, "integer feasible", leaf_objective); search_tree.update(node_ptr, node_status_t::INTEGER_FEASIBLE); - return node_solve_info_t::NO_CHILDREN; + return {node_status_t::INTEGER_FEASIBLE, rounding_direction_t::NONE}; - } else if (leaf_objective <= upper_bound + abs_fathom_tol) { + } else if (leaf_objective <= upper_bound_ + abs_fathom_tol) { // Choose fractional variable to branch on - const i_t branch_var = - pc_.variable_selection(leaf_fractional, leaf_solution.x, lp_settings.log); + auto [branch_var, round_dir] = + variable_selection(node_ptr, leaf_fractional, leaf_solution.x, thread_type); assert(leaf_vstatus.size() == leaf_problem.num_cols); + assert(branch_var >= 0); + assert(round_dir != rounding_direction_t::NONE); + + // Note that the exploration thread is the only one that can insert new nodes into the heap, + // and thus, we only need to calculate the objective estimate here (it is used for + // sorting the nodes for diving). + if (thread_type == bnb_worker_type_t::BEST_FIRST) { + logger_t pc_log; + pc_log.log = false; + node_ptr->objective_estimate = + pc_.obj_estimate(leaf_fractional, leaf_solution.x, node_ptr->lower_bound, pc_log); + } + search_tree.branch( node_ptr, branch_var, leaf_solution.x[branch_var], leaf_vstatus, leaf_problem, log); search_tree.update(node_ptr, node_status_t::HAS_CHILDREN); - - rounding_direction_t round_dir = child_selection(node_ptr); - - if (round_dir == rounding_direction_t::UP) { - return node_solve_info_t::UP_CHILD_FIRST; - } else { - return node_solve_info_t::DOWN_CHILD_FIRST; - } + return {node_status_t::HAS_CHILDREN, round_dir}; } else { search_tree.graphviz_node(log, node_ptr, "fathomed", leaf_objective); search_tree.update(node_ptr, node_status_t::FATHOMED); - return node_solve_info_t::NO_CHILDREN; + return {node_status_t::FATHOMED, rounding_direction_t::NONE}; } - } else if (lp_status == dual::status_t::TIME_LIMIT) { - search_tree.graphviz_node(log, node_ptr, "timeout", 0.0); - return node_solve_info_t::TIME_LIMIT; - } else { - if (thread_type == thread_type_t::EXPLORATION) { + if (thread_type == bnb_worker_type_t::BEST_FIRST) { fetch_min(lower_bound_ceiling_, node_ptr->lower_bound); log.printf( "LP returned status %d on node %d. This indicates a numerical issue. The best bound is set " @@ -772,17 +829,15 @@ node_solve_info_t branch_and_bound_t::solve_node( search_tree.graphviz_node(log, node_ptr, "numerical", 0.0); search_tree.update(node_ptr, node_status_t::NUMERICAL); - return node_solve_info_t::NUMERICAL; + return {node_status_t::NUMERICAL, rounding_direction_t::NONE}; } } template void branch_and_bound_t::exploration_ramp_up(mip_node_t* node, - search_tree_t* search_tree, - const csr_matrix_t& Arow, i_t initial_heap_size) { - if (solver_status_ != mip_exploration_status_t::RUNNING) { return; } + if (solver_status_ != mip_status_t::UNSET) { return; } // Note that we do not know which thread will execute the // `exploration_ramp_up` task, so we allow to any thread @@ -790,13 +845,13 @@ void branch_and_bound_t::exploration_ramp_up(mip_node_t* nod repair_heuristic_solutions(); f_t lower_bound = node->lower_bound; - f_t upper_bound = get_upper_bound(); + f_t upper_bound = upper_bound_; f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); f_t abs_gap = upper_bound - lower_bound; if (lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { - search_tree->graphviz_node(settings_.log, node, "cutoff", node->lower_bound); - search_tree->update(node, node_status_t::FATHOMED); + search_tree_.graphviz_node(settings_.log, node, "cutoff", node->lower_bound); + search_tree_.update(node, node_status_t::FATHOMED); --exploration_stats_.nodes_unexplored; return; } @@ -812,7 +867,7 @@ void branch_and_bound_t::exploration_ramp_up(mip_node_t* nod bool should_report = should_report_.exchange(false); if (should_report) { - report(" ", upper_bound, root_objective_, node->depth); + report(' ', upper_bound, root_objective_, node->depth); exploration_stats_.nodes_since_last_log = 0; exploration_stats_.last_log = tic(); should_report_ = true; @@ -820,84 +875,91 @@ void branch_and_bound_t::exploration_ramp_up(mip_node_t* nod } if (now > settings_.time_limit) { - solver_status_ = mip_exploration_status_t::TIME_LIMIT; + solver_status_ = mip_status_t::TIME_LIMIT; return; } // Make a copy of the original LP. We will modify its bounds at each leaf lp_problem_t leaf_problem = original_lp_; std::vector row_sense; - bounds_strengthening_t node_presolver(leaf_problem, Arow, row_sense, var_types_); + bounds_strengthening_t node_presolver(leaf_problem, Arow_, row_sense, var_types_); const i_t m = leaf_problem.num_rows; basis_update_mpf_t basis_factors(m, settings_.refactor_frequency); std::vector basic_list(m); std::vector nonbasic_list; - node_solve_info_t status = solve_node(node, - *search_tree, - leaf_problem, - basis_factors, - basic_list, - nonbasic_list, - node_presolver, - thread_type_t::EXPLORATION, - true, - original_lp_.lower, - original_lp_.upper, - settings_.log); + lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); + dual::status_t lp_status = solve_node_lp(node, + leaf_problem, + leaf_solution, + basis_factors, + basic_list, + nonbasic_list, + node_presolver, + bnb_worker_type_t::BEST_FIRST, + true, + original_lp_.lower, + original_lp_.upper, + exploration_stats_, + settings_.log); + if (lp_status == dual::status_t::TIME_LIMIT) { + solver_status_ = mip_status_t::TIME_LIMIT; + return; + } ++exploration_stats_.nodes_since_last_log; ++exploration_stats_.nodes_explored; --exploration_stats_.nodes_unexplored; - if (status == node_solve_info_t::TIME_LIMIT) { - solver_status_ = mip_exploration_status_t::TIME_LIMIT; - return; + auto [node_status, round_dir] = update_tree(node, + search_tree_, + leaf_problem, + leaf_solution, + bnb_worker_type_t::BEST_FIRST, + lp_status, + settings_.log); - } else if (has_children(status)) { + if (node_status == node_status_t::HAS_CHILDREN) { exploration_stats_.nodes_unexplored += 2; // If we haven't generated enough nodes to keep the threads busy, continue the ramp up phase if (exploration_stats_.nodes_unexplored < initial_heap_size) { #pragma omp task - exploration_ramp_up(node->get_down_child(), search_tree, Arow, initial_heap_size); + exploration_ramp_up(node->get_down_child(), initial_heap_size); #pragma omp task - exploration_ramp_up(node->get_up_child(), search_tree, Arow, initial_heap_size); + exploration_ramp_up(node->get_up_child(), initial_heap_size); } else { // We've generated enough nodes, push further nodes onto the heap - mutex_heap_.lock(); - heap_.push(node->get_down_child()); - heap_.push(node->get_up_child()); - mutex_heap_.unlock(); + node_queue_.push(node->get_down_child()); + node_queue_.push(node->get_up_child()); } } } template -void branch_and_bound_t::explore_subtree(i_t task_id, - mip_node_t* start_node, - search_tree_t& search_tree, - lp_problem_t& leaf_problem, - bounds_strengthening_t& node_presolver, - basis_update_mpf_t& basis_factors, - std::vector& basic_list, - std::vector& nonbasic_list) +void branch_and_bound_t::plunge_from(i_t task_id, + mip_node_t* start_node, + lp_problem_t& leaf_problem, + bounds_strengthening_t& node_presolver, + basis_update_mpf_t& basis_factors, + std::vector& basic_list, + std::vector& nonbasic_list) { bool recompute_bounds_and_basis = true; std::deque*> stack; stack.push_front(start_node); - while (stack.size() > 0 && solver_status_ == mip_exploration_status_t::RUNNING) { + while (stack.size() > 0 && solver_status_ == mip_status_t::UNSET && is_running) { if (task_id == 0) { repair_heuristic_solutions(); } mip_node_t* node_ptr = stack.front(); stack.pop_front(); f_t lower_bound = node_ptr->lower_bound; - f_t upper_bound = get_upper_bound(); + f_t upper_bound = upper_bound_; f_t abs_gap = upper_bound - lower_bound; f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); @@ -911,8 +973,9 @@ void branch_and_bound_t::explore_subtree(i_t task_id, local_lower_bounds_[task_id] = lower_bound; if (lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { - search_tree.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound); - search_tree.update(node_ptr, node_status_t::FATHOMED); + search_tree_.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound); + search_tree_.update(node_ptr, node_status_t::FATHOMED); + recompute_bounds_and_basis = true; --exploration_stats_.nodes_unexplored; continue; } @@ -927,45 +990,58 @@ void branch_and_bound_t::explore_subtree(i_t task_id, abs_gap < 10 * settings_.absolute_mip_gap_tol) && time_since_last_log >= 1) || (time_since_last_log > 30) || now > settings_.time_limit) { - report(" ", upper_bound, get_lower_bound(), node_ptr->depth); + report(' ', upper_bound, get_lower_bound(), node_ptr->depth); exploration_stats_.last_log = tic(); exploration_stats_.nodes_since_last_log = 0; } } if (now > settings_.time_limit) { - solver_status_ = mip_exploration_status_t::TIME_LIMIT; - return; + solver_status_ = mip_status_t::TIME_LIMIT; + break; } if (exploration_stats_.nodes_explored >= settings_.node_limit) { - solver_status_ = mip_exploration_status_t::NODE_LIMIT; - return; + solver_status_ = mip_status_t::NODE_LIMIT; + break; } - node_solve_info_t status = solve_node(node_ptr, - search_tree, - leaf_problem, - basis_factors, - basic_list, - nonbasic_list, - node_presolver, - thread_type_t::EXPLORATION, - recompute_bounds_and_basis, - original_lp_.lower, - original_lp_.upper, - settings_.log); - - recompute_bounds_and_basis = !has_children(status); + lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); + dual::status_t lp_status = solve_node_lp(node_ptr, + leaf_problem, + leaf_solution, + basis_factors, + basic_list, + nonbasic_list, + node_presolver, + bnb_worker_type_t::BEST_FIRST, + recompute_bounds_and_basis, + original_lp_.lower, + original_lp_.upper, + exploration_stats_, + settings_.log); + + if (lp_status == dual::status_t::TIME_LIMIT) { + solver_status_ = mip_status_t::TIME_LIMIT; + break; + } else if (lp_status == dual::status_t::ITERATION_LIMIT) { + break; + } ++exploration_stats_.nodes_since_last_log; ++exploration_stats_.nodes_explored; --exploration_stats_.nodes_unexplored; - if (status == node_solve_info_t::TIME_LIMIT) { - solver_status_ = mip_exploration_status_t::TIME_LIMIT; - return; + auto [node_status, round_dir] = update_tree(node_ptr, + search_tree_, + leaf_problem, + leaf_solution, + bnb_worker_type_t::BEST_FIRST, + lp_status, + settings_.log); + + recompute_bounds_and_basis = node_status != node_status_t::HAS_CHILDREN; - } else if (has_children(status)) { + if (node_status == node_status_t::HAS_CHILDREN) { // The stack should only contain the children of the current parent. // If the stack size is greater than 0, // we pop the current node from the stack and place it in the global heap, @@ -973,33 +1049,12 @@ void branch_and_bound_t::explore_subtree(i_t task_id, if (stack.size() > 0) { mip_node_t* node = stack.back(); stack.pop_back(); - - // The order here matters. We want to create a copy of the node - // before adding to the global heap. Otherwise, - // some thread may consume the node (possibly fathoming it) - // before we had the chance to add to the diving queue. - // This lead to a SIGSEGV. Although, in this case, it - // would be better if we discard the node instead. - if (get_heap_size() > settings_.num_bfs_threads) { - std::vector lower = original_lp_.lower; - std::vector upper = original_lp_.upper; - std::fill( - node_presolver.bounds_changed.begin(), node_presolver.bounds_changed.end(), false); - node->get_variable_bounds(lower, upper, node_presolver.bounds_changed); - - mutex_dive_queue_.lock(); - diving_queue_.emplace(node->detach_copy(), std::move(lower), std::move(upper)); - mutex_dive_queue_.unlock(); - } - - mutex_heap_.lock(); - heap_.push(node); - mutex_heap_.unlock(); + node_queue_.push(node); } exploration_stats_.nodes_unexplored += 2; - if (status == node_solve_info_t::UP_CHILD_FIRST) { + if (round_dir == rounding_direction_t::UP) { stack.push_front(node_ptr->get_down_child()); stack.push_front(node_ptr->get_up_child()); } else { @@ -1011,177 +1066,220 @@ void branch_and_bound_t::explore_subtree(i_t task_id, } template -void branch_and_bound_t::best_first_thread(i_t task_id, - search_tree_t& search_tree, - const csr_matrix_t& Arow) +void branch_and_bound_t::best_first_thread(i_t task_id) { f_t lower_bound = -inf; - f_t upper_bound = inf; f_t abs_gap = inf; f_t rel_gap = inf; // Make a copy of the original LP. We will modify its bounds at each leaf lp_problem_t leaf_problem = original_lp_; std::vector row_sense; - bounds_strengthening_t node_presolver(leaf_problem, Arow, row_sense, var_types_); + bounds_strengthening_t node_presolver(leaf_problem, Arow_, row_sense, var_types_); const i_t m = leaf_problem.num_rows; basis_update_mpf_t basis_factors(m, settings_.refactor_frequency); std::vector basic_list(m); std::vector nonbasic_list; - while (solver_status_ == mip_exploration_status_t::RUNNING && - abs_gap > settings_.absolute_mip_gap_tol && rel_gap > settings_.relative_mip_gap_tol && - (active_subtrees_ > 0 || get_heap_size() > 0)) { - mip_node_t* start_node = nullptr; - + while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && + rel_gap > settings_.relative_mip_gap_tol && + (active_subtrees_ > 0 || node_queue_.best_first_queue_size() > 0)) { + // In the current implementation, we are use the active number of subtree to decide + // when to stop the execution. We need to increment the counter at the same + // time as we pop a node from the queue to avoid some threads exiting + // the main loop thinking that the solver has already finished. + // This will be not needed in the master-worker model. + node_queue_.lock(); // If there any node left in the heap, we pop the top node and explore it. - mutex_heap_.lock(); - if (heap_.size() > 0) { - start_node = heap_.top(); - heap_.pop(); - active_subtrees_++; - } - mutex_heap_.unlock(); + std::optional*> start_node = node_queue_.pop_best_first(); + if (start_node.has_value()) { active_subtrees_++; }; + node_queue_.unlock(); - if (start_node != nullptr) { - if (get_upper_bound() < start_node->lower_bound) { + if (start_node.has_value()) { + if (upper_bound_ < start_node.value()->lower_bound) { // This node was put on the heap earlier but its lower bound is now greater than the // current upper bound - search_tree.graphviz_node(settings_.log, start_node, "cutoff", start_node->lower_bound); - search_tree.update(start_node, node_status_t::FATHOMED); + search_tree_.graphviz_node( + settings_.log, start_node.value(), "cutoff", start_node.value()->lower_bound); + search_tree_.update(start_node.value(), node_status_t::FATHOMED); active_subtrees_--; continue; } // Best-first search with plunging - explore_subtree(task_id, - start_node, - search_tree, - leaf_problem, - node_presolver, - basis_factors, - basic_list, - nonbasic_list); + plunge_from(task_id, + start_node.value(), + leaf_problem, + node_presolver, + basis_factors, + basic_list, + nonbasic_list); active_subtrees_--; } lower_bound = get_lower_bound(); - upper_bound = get_upper_bound(); - abs_gap = upper_bound - lower_bound; - rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); + abs_gap = upper_bound_ - lower_bound; + rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); } + is_running = false; + // Check if it is the last thread that exited the loop and no // timeout or numerical error has happen. - if (solver_status_ == mip_exploration_status_t::RUNNING) { - if (active_subtrees_ == 0) { - solver_status_ = mip_exploration_status_t::COMPLETED; - } else { - local_lower_bounds_[task_id] = inf; - } + if (solver_status_ == mip_status_t::UNSET) { + if (active_subtrees_ > 0) { local_lower_bounds_[task_id] = inf; } } } template -void branch_and_bound_t::diving_thread(const csr_matrix_t& Arow) +void branch_and_bound_t::dive_from(mip_node_t& start_node, + const std::vector& start_lower, + const std::vector& start_upper, + lp_problem_t& leaf_problem, + bounds_strengthening_t& node_presolver, + basis_update_mpf_t& basis_factors, + std::vector& basic_list, + std::vector& nonbasic_list, + bnb_worker_type_t diving_type) { logger_t log; log.log = false; + + const i_t diving_node_limit = settings_.diving_settings.node_limit; + const i_t diving_backtrack_limit = settings_.diving_settings.backtrack_limit; + bool recompute_bounds_and_basis = true; + search_tree_t dive_tree(std::move(start_node)); + std::deque*> stack; + stack.push_front(&dive_tree.root); + + bnb_stats_t dive_stats; + dive_stats.total_lp_iters = 0; + dive_stats.total_lp_solve_time = 0; + dive_stats.nodes_explored = 0; + dive_stats.nodes_unexplored = 1; + + while (stack.size() > 0 && solver_status_ == mip_status_t::UNSET && is_running) { + mip_node_t* node_ptr = stack.front(); + stack.pop_front(); + + f_t lower_bound = node_ptr->lower_bound; + f_t upper_bound = upper_bound_; + f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); + + if (node_ptr->lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { + recompute_bounds_and_basis = true; + continue; + } + + if (toc(exploration_stats_.start_time) > settings_.time_limit) { break; } + if (dive_stats.nodes_explored > diving_node_limit) { break; } + + lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); + dual::status_t lp_status = solve_node_lp(node_ptr, + leaf_problem, + leaf_solution, + basis_factors, + basic_list, + nonbasic_list, + node_presolver, + diving_type, + recompute_bounds_and_basis, + start_lower, + start_upper, + dive_stats, + log); + + if (lp_status == dual::status_t::TIME_LIMIT) { + solver_status_ = mip_status_t::TIME_LIMIT; + break; + } else if (lp_status == dual::status_t::ITERATION_LIMIT) { + break; + } + + ++dive_stats.nodes_explored; + + auto [node_status, round_dir] = + update_tree(node_ptr, dive_tree, leaf_problem, leaf_solution, diving_type, lp_status, log); + recompute_bounds_and_basis = node_status != node_status_t::HAS_CHILDREN; + + if (node_status == node_status_t::HAS_CHILDREN) { + if (round_dir == rounding_direction_t::UP) { + stack.push_front(node_ptr->get_down_child()); + stack.push_front(node_ptr->get_up_child()); + } else { + stack.push_front(node_ptr->get_up_child()); + stack.push_front(node_ptr->get_down_child()); + } + } + + // Remove nodes that we no longer can backtrack to (i.e., from the current node, we can only + // backtrack to a node that is has a depth of at most 5 levels lower than the current node). + if (stack.size() > 1 && stack.front()->depth - stack.back()->depth > diving_backtrack_limit) { + stack.pop_back(); + } + } +} + +template +void branch_and_bound_t::diving_thread(bnb_worker_type_t diving_type) +{ // Make a copy of the original LP. We will modify its bounds at each leaf lp_problem_t leaf_problem = original_lp_; std::vector row_sense; - bounds_strengthening_t node_presolver(leaf_problem, Arow, row_sense, var_types_); + bounds_strengthening_t node_presolver(leaf_problem, Arow_, row_sense, var_types_); const i_t m = leaf_problem.num_rows; basis_update_mpf_t basis_factors(m, settings_.refactor_frequency); std::vector basic_list(m); std::vector nonbasic_list; - while (solver_status_ == mip_exploration_status_t::RUNNING && - (active_subtrees_ > 0 || get_heap_size() > 0)) { - std::optional> start_node; + std::vector start_lower; + std::vector start_upper; + bool reset_starting_bounds = true; + + while (solver_status_ == mip_status_t::UNSET && is_running && + (active_subtrees_ > 0 || node_queue_.best_first_queue_size() > 0)) { + if (reset_starting_bounds) { + start_lower = original_lp_.lower; + start_upper = original_lp_.upper; + std::fill(node_presolver.bounds_changed.begin(), node_presolver.bounds_changed.end(), false); + reset_starting_bounds = false; + } - mutex_dive_queue_.lock(); - if (diving_queue_.size() > 0) { start_node = diving_queue_.pop(); } - mutex_dive_queue_.unlock(); + // In the current implementation, multiple threads can pop the nodes + // from the queue, so we need to initialize the lower and upper bound here + // to avoid other thread fathoming the node (i.e., deleting) before we can read + // the variable bounds from the tree. + // This will be not needed in the master-worker model. + node_queue_.lock(); + std::optional*> node_ptr = node_queue_.pop_diving(); + std::optional> start_node = std::nullopt; + + if (node_ptr.has_value()) { + node_ptr.value()->get_variable_bounds( + start_lower, start_upper, node_presolver.bounds_changed); + start_node = node_ptr.value()->detach_copy(); + } + node_queue_.unlock(); if (start_node.has_value()) { - if (get_upper_bound() < start_node->node.lower_bound) { continue; } - - bool recompute_bounds_and_basis = true; - i_t nodes_explored = 0; - search_tree_t subtree(std::move(start_node->node)); - std::deque*> stack; - stack.push_front(&subtree.root); - - while (stack.size() > 0 && solver_status_ == mip_exploration_status_t::RUNNING) { - mip_node_t* node_ptr = stack.front(); - stack.pop_front(); - f_t upper_bound = get_upper_bound(); - f_t rel_gap = user_relative_gap(original_lp_, upper_bound, node_ptr->lower_bound); - - if (node_ptr->lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { - recompute_bounds_and_basis = true; - continue; - } - - if (toc(exploration_stats_.start_time) > settings_.time_limit) { return; } - - if (nodes_explored >= 1000) { break; } - - node_solve_info_t status = solve_node(node_ptr, - subtree, - leaf_problem, - basis_factors, - basic_list, - nonbasic_list, - node_presolver, - thread_type_t::DIVING, - recompute_bounds_and_basis, - start_node->lower, - start_node->upper, - log); - - nodes_explored++; - - recompute_bounds_and_basis = !has_children(status); - - if (status == node_solve_info_t::TIME_LIMIT) { - solver_status_ = mip_exploration_status_t::TIME_LIMIT; - return; - - } else if (has_children(status)) { - if (status == node_solve_info_t::UP_CHILD_FIRST) { - stack.push_front(node_ptr->get_down_child()); - stack.push_front(node_ptr->get_up_child()); - } else { - stack.push_front(node_ptr->get_up_child()); - stack.push_front(node_ptr->get_down_child()); - } - } - - if (stack.size() > 1) { - // If the diving thread is consuming the nodes faster than the - // best first search, then we split the current subtree at the - // lowest possible point and move to the queue, so it can - // be picked by another thread. - if (std::lock_guard lock(mutex_dive_queue_); - diving_queue_.size() < min_diving_queue_size_) { - mip_node_t* new_node = stack.back(); - stack.pop_back(); - - std::vector lower = start_node->lower; - std::vector upper = start_node->upper; - std::fill( - node_presolver.bounds_changed.begin(), node_presolver.bounds_changed.end(), false); - new_node->get_variable_bounds(lower, upper, node_presolver.bounds_changed); - - diving_queue_.emplace(new_node->detach_copy(), std::move(lower), std::move(upper)); - } - } - } + reset_starting_bounds = true; + + if (upper_bound_ < start_node->lower_bound) { continue; } + bool is_feasible = node_presolver.bounds_strengthening(start_lower, start_upper, settings_); + if (!is_feasible) { continue; } + + dive_from(start_node.value(), + start_lower, + start_upper, + leaf_problem, + node_presolver, + basis_factors, + basic_list, + nonbasic_list, + diving_type); } } } @@ -1267,9 +1365,35 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut logger_t log; log.log = false; log.log_prefix = settings_.log.log_prefix; - solver_status_ = mip_exploration_status_t::UNSET; + solver_status_ = mip_status_t::UNSET; + is_running = false; exploration_stats_.nodes_unexplored = 0; exploration_stats_.nodes_explored = 0; + original_lp_.A.to_compressed_row(Arow_); + + std::vector diving_strategies; + diving_strategies.reserve(4); + + if (settings_.diving_settings.pseudocost_diving != 0) { + diving_strategies.push_back(bnb_worker_type_t::PSEUDOCOST_DIVING); + } + + if (settings_.diving_settings.line_search_diving != 0) { + diving_strategies.push_back(bnb_worker_type_t::LINE_SEARCH_DIVING); + } + + if (settings_.diving_settings.guided_diving != 0) { + diving_strategies.push_back(bnb_worker_type_t::GUIDED_DIVING); + } + + if (settings_.diving_settings.coefficient_diving != 0) { + diving_strategies.push_back(bnb_worker_type_t::COEFFICIENT_DIVING); + calculate_variable_locks(original_lp_, var_up_locks_, var_down_locks_); + } + + if (diving_strategies.empty()) { + settings_.log.printf("Warning: All diving heuristics are disabled!\n"); + } if (guess_.size() != 0) { std::vector crushed_guess; @@ -1331,15 +1455,16 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } if (root_status == lp_status_t::TIME_LIMIT) { - solver_status_ = mip_exploration_status_t::TIME_LIMIT; - return set_final_solution(solution, -inf); + solver_status_ = mip_status_t::TIME_LIMIT; + set_final_solution(solution, -inf); + return solver_status_; } assert(root_vstatus_.size() == original_lp_.num_cols); set_uninitialized_steepest_edge_norms(edge_norms_); root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); - local_lower_bounds_.assign(settings_.num_bfs_threads, root_objective_); + local_lower_bounds_.assign(settings_.num_bfs_workers, root_objective_); if (settings_.set_simplex_solution_callback != nullptr) { std::vector original_x; @@ -1397,8 +1522,9 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut pc_); if (toc(exploration_stats_.start_time) > settings_.time_limit) { - solver_status_ = mip_exploration_status_t::TIME_LIMIT; - return set_final_solution(solution, root_objective_); + solver_status_ = mip_status_t::TIME_LIMIT; + set_final_solution(solution, root_objective_); + return solver_status_; } // Choose variable to branch on @@ -1414,61 +1540,62 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut original_lp_, log); - csr_matrix_t Arow(1, 1, 0); - original_lp_.A.to_compressed_row(Arow); - settings_.log.printf("Exploring the B&B tree using %d threads (best-first = %d, diving = %d)\n", settings_.num_threads, - settings_.num_bfs_threads, - settings_.num_diving_threads); - - settings_.log.printf( - " | Explored | Unexplored | Objective | Bound | Depth | Iter/Node | Gap " - "| Time |\n"); + settings_.num_bfs_workers, + settings_.num_threads - settings_.num_bfs_workers); - exploration_stats_.nodes_explored = 1; + exploration_stats_.nodes_explored = 0; exploration_stats_.nodes_unexplored = 2; exploration_stats_.nodes_since_last_log = 0; exploration_stats_.last_log = tic(); active_subtrees_ = 0; - min_diving_queue_size_ = 4 * settings_.num_diving_threads; - solver_status_ = mip_exploration_status_t::RUNNING; + is_running = true; lower_bound_ceiling_ = inf; should_report_ = true; + settings_.log.printf( + " | Explored | Unexplored | Objective | Bound | Depth | Iter/Node | Gap " + "| Time |\n"); + #pragma omp parallel num_threads(settings_.num_threads) { #pragma omp master { - auto down_child = search_tree_.root.get_down_child(); - auto up_child = search_tree_.root.get_up_child(); - i_t initial_size = 2 * settings_.num_threads; + auto down_child = search_tree_.root.get_down_child(); + auto up_child = search_tree_.root.get_up_child(); + i_t initial_size = 2 * settings_.num_threads; + const i_t num_strategies = diving_strategies.size(); +#pragma omp taskgroup + { #pragma omp task - exploration_ramp_up(down_child, &search_tree_, Arow, initial_size); + exploration_ramp_up(down_child, initial_size); #pragma omp task - exploration_ramp_up(up_child, &search_tree_, Arow, initial_size); - } - -#pragma omp barrier + exploration_ramp_up(up_child, initial_size); + } -#pragma omp master - { - for (i_t i = 0; i < settings_.num_bfs_threads; i++) { + for (i_t i = 0; i < settings_.num_bfs_workers; i++) { #pragma omp task - best_first_thread(i, search_tree_, Arow); + best_first_thread(i); } - for (i_t i = 0; i < settings_.num_diving_threads; i++) { + if (!diving_strategies.empty()) { + for (i_t k = 0; k < settings_.diving_settings.num_diving_workers; k++) { + const bnb_worker_type_t diving_type = diving_strategies[k % num_strategies]; #pragma omp task - diving_thread(Arow); + diving_thread(diving_type); + } } } } - f_t lower_bound = heap_.size() > 0 ? heap_.top()->lower_bound : search_tree_.root.lower_bound; - return set_final_solution(solution, lower_bound); + is_running = false; + f_t lower_bound = node_queue_.best_first_queue_size() > 0 ? node_queue_.get_lower_bound() + : search_tree_.root.lower_bound; + set_final_solution(solution, lower_bound); + return solver_status_; } #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp index c71265729..dac1ab393 100644 --- a/cpp/src/dual_simplex/branch_and_bound.hpp +++ b/cpp/src/dual_simplex/branch_and_bound.hpp @@ -7,9 +7,10 @@ #pragma once -#include +#include #include #include +#include #include #include #include @@ -20,7 +21,6 @@ #include #include -#include #include namespace cuopt::linear_programming::dual_simplex { @@ -35,31 +35,17 @@ enum class mip_status_t { UNSET = 6, // The status is not set }; -enum class mip_exploration_status_t { - UNSET = 0, // The status is not set - TIME_LIMIT = 1, // The solver reached a time limit - NODE_LIMIT = 2, // The maximum number of nodes was reached (not implemented) - NUMERICAL = 3, // The solver encountered a numerical error - RUNNING = 4, // The solver is currently exploring the tree - COMPLETED = 5, // The solver finished exploring the tree -}; - -enum class node_solve_info_t { - NO_CHILDREN = 0, // The node does not produced children - UP_CHILD_FIRST = 1, // The up child should be explored first - DOWN_CHILD_FIRST = 2, // The down child should be explored first - TIME_LIMIT = 3, // The solver reached a time limit - ITERATION_LIMIT = 4, // The solver reached a iteration limit - NUMERICAL = 5 // The solver encounter a numerical error when solving the node -}; - -// Indicate the search and variable selection algorithms used by the thread (See [1]). +// Indicate the search and variable selection algorithms used by each thread +// in B&B (See [1]). // // [1] T. Achterberg, “Constraint Integer Programming,” PhD, Technischen Universität Berlin, // Berlin, 2007. doi: 10.14279/depositonce-1634. -enum class thread_type_t { - EXPLORATION = 0, // Best-First + Plunging. Pseudocost branching + Martin's criteria. - DIVING = 1, +enum class bnb_worker_type_t { + BEST_FIRST = 0, // Best-First + Plunging. + PSEUDOCOST_DIVING = 1, // Pseudocost diving (9.2.5) + LINE_SEARCH_DIVING = 2, // Line search diving (9.2.4) + GUIDED_DIVING = 3, // Guided diving (9.2.3). If no incumbent is found yet, use pseudocost diving. + COEFFICIENT_DIVING = 4 // Coefficient diving (9.2.1) }; template @@ -68,12 +54,22 @@ class bounds_strengthening_t; template void upper_bound_callback(f_t upper_bound); +template +struct bnb_stats_t { + f_t start_time = 0.0; + omp_atomic_t total_lp_solve_time = 0.0; + omp_atomic_t nodes_explored = 0; + omp_atomic_t nodes_unexplored = 0; + omp_atomic_t total_lp_iters = 0; + + // This should only be used by the main thread + omp_atomic_t last_log = 0.0; + omp_atomic_t nodes_since_last_log = 0; +}; + template class branch_and_bound_t { public: - template - using mip_node_heap_t = std::priority_queue, node_compare_t>; - branch_and_bound_t(const user_problem_t& user_problem, const simplex_solver_settings_t& solver_settings); @@ -109,9 +105,7 @@ class branch_and_bound_t { f_t& repaired_obj, std::vector& repaired_solution) const; - f_t get_upper_bound(); f_t get_lower_bound(); - i_t get_heap_size(); bool enable_concurrent_lp_root_solve() const { return enable_concurrent_lp_root_solve_; } std::atomic* get_root_concurrent_halt() { return &root_concurrent_halt_; } void set_root_concurrent_halt(int value) { root_concurrent_halt_ = value; } @@ -128,10 +122,17 @@ class branch_and_bound_t { std::vector guess_; // LP relaxation + csr_matrix_t Arow_; lp_problem_t original_lp_; std::vector new_slacks_; std::vector var_types_; + // Variable locks (see definition 3.3 from T. Achterberg, “Constraint Integer Programming,” + // PhD, Technischen Universität Berlin, Berlin, 2007. doi: 10.14279/depositonce-1634). + // Here we assume that the constraints are in the form `Ax = b, l <= x <= u`. + std::vector var_up_locks_; + std::vector var_down_locks_; + // Local lower bounds for each thread std::vector> local_lower_bounds_; @@ -139,23 +140,13 @@ class branch_and_bound_t { omp_mutex_t mutex_upper_; // Global variable for upper bound - f_t upper_bound_; + omp_atomic_t upper_bound_; // Global variable for incumbent. The incumbent should be updated with the upper bound mip_solution_t incumbent_; // Structure with the general info of the solver. - struct stats_t { - f_t start_time = 0.0; - omp_atomic_t total_lp_solve_time = 0.0; - omp_atomic_t nodes_explored = 0; - omp_atomic_t nodes_unexplored = 0; - omp_atomic_t total_lp_iters = 0; - - // This should only be used by the main thread - omp_atomic_t last_log = 0.0; - omp_atomic_t nodes_since_last_log = 0; - } exploration_stats_; + bnb_stats_t exploration_stats_; // Mutex for repair omp_mutex_t mutex_repair_; @@ -175,9 +166,8 @@ class branch_and_bound_t { // Pseudocosts pseudo_costs_t pc_; - // Heap storing the nodes to be explored. - omp_mutex_t mutex_heap_; - mip_node_heap_t*> heap_; + // Heap storing the nodes waiting to be explored. + node_queue_t node_queue_; // Search tree search_tree_t search_tree_; @@ -185,13 +175,9 @@ class branch_and_bound_t { // Count the number of subtrees that are currently being explored. omp_atomic_t active_subtrees_; - // Queue for storing the promising node for performing dives. - omp_mutex_t mutex_dive_queue_; - diving_queue_t diving_queue_; - i_t min_diving_queue_size_; - // Global status of the solver. - omp_atomic_t solver_status_; + omp_atomic_t solver_status_; + omp_atomic_t is_running{false}; omp_atomic_t should_report_; @@ -200,64 +186,87 @@ class branch_and_bound_t { omp_atomic_t lower_bound_ceiling_; void report_heuristic(f_t obj); - void report(std::string symbol, f_t obj, f_t lower_bound, i_t node_depth); + void report(char symbol, f_t obj, f_t lower_bound, i_t node_depth); // Set the final solution. - mip_status_t set_final_solution(mip_solution_t& solution, f_t lower_bound); + void set_final_solution(mip_solution_t& solution, f_t lower_bound); // Update the incumbent solution with the new feasible solution // found during branch and bound. void add_feasible_solution(f_t leaf_objective, const std::vector& leaf_solution, i_t leaf_depth, - thread_type_t thread_type); + bnb_worker_type_t thread_type); // Repairs low-quality solutions from the heuristics, if it is applicable. void repair_heuristic_solutions(); // Ramp-up phase of the solver, where we greedily expand the tree until // there is enough unexplored nodes. This is done recursively using OpenMP tasks. - void exploration_ramp_up(mip_node_t* node, - search_tree_t* search_tree, - const csr_matrix_t& Arow, - i_t initial_heap_size); - - // Explore the search tree using the best-first search with plunging strategy. - void explore_subtree(i_t task_id, - mip_node_t* start_node, - search_tree_t& search_tree, - lp_problem_t& leaf_problem, - bounds_strengthening_t& node_presolver, - basis_update_mpf_t& basis_update, - std::vector& basic_list, - std::vector& nonbasic_list); + void exploration_ramp_up(mip_node_t* node, i_t initial_heap_size); + + // We use best-first to pick the `start_node` and then perform a depth-first search + // from this node (i.e., a plunge). It can only backtrack to a sibling node. + // Unexplored nodes in the subtree are inserted back into the global heap. + void plunge_from(i_t task_id, + mip_node_t* start_node, + lp_problem_t& leaf_problem, + bounds_strengthening_t& node_presolver, + basis_update_mpf_t& basis_update, + std::vector& basic_list, + std::vector& nonbasic_list); // Each "main" thread pops a node from the global heap and then performs a plunge // (i.e., a shallow dive) into the subtree determined by the node. - void best_first_thread(i_t task_id, - search_tree_t& search_tree, - const csr_matrix_t& Arow); + void best_first_thread(i_t task_id); + + // Perform a deep dive in the subtree determined by the `start_node` in order + // to find integer feasible solutions. + void dive_from(mip_node_t& start_node, + const std::vector& start_lower, + const std::vector& start_upper, + lp_problem_t& leaf_problem, + bounds_strengthening_t& node_presolver, + basis_update_mpf_t& basis_update, + std::vector& basic_list, + std::vector& nonbasic_list, + bnb_worker_type_t diving_type); // Each diving thread pops the first node from the dive queue and then performs // a deep dive into the subtree determined by the node. - void diving_thread(const csr_matrix_t& Arow); + void diving_thread(bnb_worker_type_t diving_type); - // Solve the LP relaxation of a leaf node and update the tree. - node_solve_info_t solve_node(mip_node_t* node_ptr, - search_tree_t& search_tree, + // Solve the LP relaxation of a leaf node + dual::status_t solve_node_lp(mip_node_t* node_ptr, lp_problem_t& leaf_problem, + lp_solution_t& leaf_solution, basis_update_mpf_t& basis_factors, std::vector& basic_list, std::vector& nonbasic_list, bounds_strengthening_t& node_presolver, - thread_type_t thread_type, - bool recompute_basis_and_bounds, + bnb_worker_type_t thread_type, + bool recompute_bounds_and_basis, const std::vector& root_lower, const std::vector& root_upper, + bnb_stats_t& stats, logger_t& log); - // Sort the children based on the Martin's criteria. - rounding_direction_t child_selection(mip_node_t* node_ptr); + // Update the tree based on the LP relaxation. Returns the status + // of the node and, if appropriated, the preferred rounding direction + // when visiting the children. + std::pair update_tree(mip_node_t* node_ptr, + search_tree_t& search_tree, + lp_problem_t& leaf_problem, + lp_solution_t& leaf_solution, + bnb_worker_type_t thread_type, + dual::status_t lp_status, + logger_t& log); + + // Selects the variable to branch on. + branch_variable_t variable_selection(mip_node_t* node_ptr, + const std::vector& fractional, + const std::vector& solution, + bnb_worker_type_t type); }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index 23d9a0e8e..8ee3fb0ce 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -785,8 +785,15 @@ i_t primal_push(const lp_problem_t& lp, factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); - basis_repair( - lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); + basis_repair(lp.A, + settings, + lp.lower, + lp.upper, + deficient, + slacks_needed, + basic_list, + nonbasic_list, + vstatus); if (factorize_basis( lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); @@ -1132,7 +1139,15 @@ crossover_status_t crossover(const lp_problem_t& lp, rank = factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); - basis_repair(lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); + basis_repair(lp.A, + settings, + lp.lower, + lp.upper, + deficient, + slacks_needed, + basic_list, + nonbasic_list, + vstatus); if (factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); @@ -1323,7 +1338,15 @@ crossover_status_t crossover(const lp_problem_t& lp, factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); - basis_repair(lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); + basis_repair(lp.A, + settings, + lp.lower, + lp.upper, + deficient, + slacks_needed, + basic_list, + nonbasic_list, + vstatus); if (factorize_basis( lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); diff --git a/cpp/src/dual_simplex/diving_heuristics.cpp b/cpp/src/dual_simplex/diving_heuristics.cpp new file mode 100644 index 000000000..a56b4cce3 --- /dev/null +++ b/cpp/src/dual_simplex/diving_heuristics.cpp @@ -0,0 +1,306 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#include +#include + +namespace cuopt::linear_programming::dual_simplex { + +template +branch_variable_t line_search_diving(const std::vector& fractional, + const std::vector& solution, + const std::vector& root_solution, + logger_t& log) +{ + constexpr f_t eps = 1e-6; + i_t branch_var = -1; + f_t min_score = std::numeric_limits::max(); + rounding_direction_t round_dir = rounding_direction_t::NONE; + + for (i_t j : fractional) { + f_t score = inf; + rounding_direction_t dir = rounding_direction_t::NONE; + + if (solution[j] < root_solution[j] - eps) { + f_t f = solution[j] - std::floor(solution[j]); + f_t d = root_solution[j] - solution[j]; + score = f / d; + dir = rounding_direction_t::DOWN; + + } else if (solution[j] > root_solution[j] + eps) { + f_t f = std::ceil(solution[j]) - solution[j]; + f_t d = solution[j] - root_solution[j]; + score = f / d; + dir = rounding_direction_t::UP; + } + + if (min_score > score) { + min_score = score; + branch_var = j; + round_dir = dir; + } + } + + // If the current solution is equal to the root solution, arbitrarily + // set the branch variable to the first fractional variable and round it down + if (round_dir == rounding_direction_t::NONE) { + branch_var = fractional[0]; + round_dir = rounding_direction_t::DOWN; + } + + assert(round_dir != rounding_direction_t::NONE); + assert(branch_var >= 0); + + log.debug("Line search diving: selected var %d with val = %e, round dir = %d and score = %e\n", + branch_var, + solution[branch_var], + round_dir, + min_score); + + return {branch_var, round_dir}; +} + +template +branch_variable_t pseudocost_diving(pseudo_costs_t& pc, + const std::vector& fractional, + const std::vector& solution, + const std::vector& root_solution, + logger_t& log) +{ + std::lock_guard lock(pc.mutex); + i_t branch_var = -1; + f_t max_score = std::numeric_limits::lowest(); + rounding_direction_t round_dir = rounding_direction_t::NONE; + constexpr f_t eps = 1e-6; + + i_t num_initialized_down; + i_t num_initialized_up; + f_t pseudo_cost_down_avg; + f_t pseudo_cost_up_avg; + pc.initialized( + num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); + + for (i_t j : fractional) { + rounding_direction_t dir = rounding_direction_t::NONE; + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + + f_t pc_down = pc.pseudo_cost_num_down[j] != 0 + ? pc.pseudo_cost_sum_down[j] / pc.pseudo_cost_num_down[j] + : pseudo_cost_down_avg; + + f_t pc_up = pc.pseudo_cost_num_up[j] != 0 ? pc.pseudo_cost_sum_up[j] / pc.pseudo_cost_num_up[j] + : pseudo_cost_up_avg; + + f_t score_down = std::sqrt(f_up) * (1 + pc_up) / (1 + pc_down); + f_t score_up = std::sqrt(f_down) * (1 + pc_down) / (1 + pc_up); + f_t score = 0; + + if (solution[j] < root_solution[j] - 0.4) { + score = score_down; + dir = rounding_direction_t::DOWN; + } else if (solution[j] > root_solution[j] + 0.4) { + score = score_up; + dir = rounding_direction_t::UP; + } else if (f_down < 0.3) { + score = score_down; + dir = rounding_direction_t::DOWN; + } else if (f_down > 0.7) { + score = score_up; + dir = rounding_direction_t::UP; + } else if (pc_down < pc_up + eps) { + score = score_down; + dir = rounding_direction_t::DOWN; + } else { + score = score_up; + dir = rounding_direction_t::UP; + } + + if (score > max_score) { + max_score = score; + branch_var = j; + round_dir = dir; + } + } + + assert(round_dir != rounding_direction_t::NONE); + assert(branch_var >= 0); + + log.debug("Pseudocost diving: selected var %d with val = %e, round dir = %d and score = %e\n", + branch_var, + solution[branch_var], + round_dir, + max_score); + + return {branch_var, round_dir}; +} + +template +branch_variable_t guided_diving(pseudo_costs_t& pc, + const std::vector& fractional, + const std::vector& solution, + const std::vector& incumbent, + logger_t& log) +{ + std::lock_guard lock(pc.mutex); + i_t branch_var = -1; + f_t max_score = std::numeric_limits::lowest(); + rounding_direction_t round_dir = rounding_direction_t::NONE; + constexpr f_t eps = 1e-6; + + i_t num_initialized_down; + i_t num_initialized_up; + f_t pseudo_cost_down_avg; + f_t pseudo_cost_up_avg; + pc.initialized( + num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); + + for (i_t j : fractional) { + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + f_t down_dist = std::abs(incumbent[j] - std::floor(solution[j])); + f_t up_dist = std::abs(std::ceil(solution[j]) - incumbent[j]); + rounding_direction_t dir = + down_dist < up_dist + eps ? rounding_direction_t::DOWN : rounding_direction_t::UP; + + f_t pc_down = pc.pseudo_cost_num_down[j] != 0 + ? pc.pseudo_cost_sum_down[j] / pc.pseudo_cost_num_down[j] + : pseudo_cost_down_avg; + + f_t pc_up = pc.pseudo_cost_num_up[j] != 0 ? pc.pseudo_cost_sum_up[j] / pc.pseudo_cost_num_up[j] + : pseudo_cost_up_avg; + + f_t score1 = dir == rounding_direction_t::DOWN ? 5 * pc_down * f_down : 5 * pc_up * f_up; + f_t score2 = dir == rounding_direction_t::DOWN ? pc_up * f_up : pc_down * f_down; + f_t score = (score1 + score2) / 6; + + if (score > max_score) { + max_score = score; + branch_var = j; + round_dir = dir; + } + } + + assert(round_dir != rounding_direction_t::NONE); + assert(branch_var >= 0); + + log.debug("Guided diving: selected var %d with val = %e, round dir = %d and score = %e\n", + branch_var, + solution[branch_var], + round_dir, + max_score); + + return {branch_var, round_dir}; +} + +template +void calculate_variable_locks(const lp_problem_t& lp_problem, + std::vector& up_locks, + std::vector& down_locks) +{ + constexpr f_t eps = 1E-6; + up_locks.assign(lp_problem.num_cols, 0); + down_locks.assign(lp_problem.num_cols, 0); + + for (i_t j = 0; j < lp_problem.num_cols; ++j) { + i_t start = lp_problem.A.col_start[j]; + i_t end = lp_problem.A.col_start[j + 1]; + + for (i_t p = start; p < end; ++p) { + f_t val = lp_problem.A.x[p]; + if (std::abs(val) > eps) { + up_locks[j]++; + down_locks[j]++; + } + } + } +} + +template +branch_variable_t coefficient_diving(const lp_problem_t& lp_problem, + const std::vector& fractional, + const std::vector& solution, + const std::vector& up_locks, + const std::vector& down_locks, + logger_t& log) +{ + i_t branch_var = -1; + i_t min_locks = std::numeric_limits::max(); + rounding_direction_t round_dir = rounding_direction_t::NONE; + constexpr f_t eps = 1e-6; + + for (i_t j : fractional) { + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + i_t up_lock = up_locks[j]; + i_t down_lock = down_locks[j]; + f_t upper = lp_problem.upper[j]; + f_t lower = lp_problem.lower[j]; + if (std::isfinite(upper)) { up_lock++; } + if (std::isfinite(lower)) { down_lock++; } + i_t alpha = std::min(up_lock, down_lock); + + if (min_locks > alpha) { + min_locks = alpha; + branch_var = j; + + if (up_lock < down_lock) { + round_dir = rounding_direction_t::UP; + } else if (up_lock > down_lock) { + round_dir = rounding_direction_t::DOWN; + } else if (f_down < f_up + eps) { + round_dir = rounding_direction_t::DOWN; + } else { + round_dir = rounding_direction_t::UP; + } + } + } + + assert(round_dir != rounding_direction_t::NONE); + assert(branch_var >= 0); + + log.debug( + "Coefficient diving: selected var %d with val = %e, round dir = %d and min locks = %d\n", + branch_var, + solution[branch_var], + round_dir, + min_locks); + + return {branch_var, round_dir}; +} + +#ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE +template branch_variable_t line_search_diving(const std::vector& fractional, + const std::vector& solution, + const std::vector& root_solution, + logger_t& log); + +template branch_variable_t pseudocost_diving(pseudo_costs_t& pc, + const std::vector& fractional, + const std::vector& solution, + const std::vector& root_solution, + logger_t& log); + +template branch_variable_t guided_diving(pseudo_costs_t& pc, + const std::vector& fractional, + const std::vector& solution, + const std::vector& incumbent, + logger_t& log); + +template void calculate_variable_locks(const lp_problem_t& lp_problem, + std::vector& up_locks, + std::vector& down_locks); + +template branch_variable_t coefficient_diving(const lp_problem_t& lp_problem, + const std::vector& fractional, + const std::vector& solution, + const std::vector& up_locks, + const std::vector& down_locks, + logger_t& log); +#endif + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/diving_heuristics.hpp b/cpp/src/dual_simplex/diving_heuristics.hpp new file mode 100644 index 000000000..3c6d77c04 --- /dev/null +++ b/cpp/src/dual_simplex/diving_heuristics.hpp @@ -0,0 +1,58 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include +#include +#include +#include + +namespace cuopt::linear_programming::dual_simplex { + +template +struct branch_variable_t { + i_t variable; + rounding_direction_t direction; +}; + +template +branch_variable_t line_search_diving(const std::vector& fractional, + const std::vector& solution, + const std::vector& root_solution, + logger_t& log); + +template +branch_variable_t pseudocost_diving(pseudo_costs_t& pc, + const std::vector& fractional, + const std::vector& solution, + const std::vector& root_solution, + logger_t& log); + +template +branch_variable_t guided_diving(pseudo_costs_t& pc, + const std::vector& fractional, + const std::vector& solution, + const std::vector& incumbent, + logger_t& log); + +// Calculate the variable locks assuming that the constraints +// has the following format: `Ax = b`. +template +void calculate_variable_locks(const lp_problem_t& lp_problem, + std::vector& up_locks, + std::vector& down_locks); + +template +branch_variable_t coefficient_diving(const lp_problem_t& lp_problem, + const std::vector& fractional, + const std::vector& solution, + const std::vector& up_locks, + const std::vector& down_locks, + logger_t& log); + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/diving_queue.hpp b/cpp/src/dual_simplex/diving_queue.hpp deleted file mode 100644 index f7035109e..000000000 --- a/cpp/src/dual_simplex/diving_queue.hpp +++ /dev/null @@ -1,73 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include -#include -#include - -#include - -namespace cuopt::linear_programming::dual_simplex { - -template -struct diving_root_t { - mip_node_t node; - std::vector lower; - std::vector upper; - - diving_root_t(mip_node_t&& node, std::vector&& lower, std::vector&& upper) - : node(std::move(node)), lower(std::move(lower)), upper(std::move(upper)) - { - } - - friend bool operator>(const diving_root_t& a, const diving_root_t& b) - { - return a.node.lower_bound > b.node.lower_bound; - } -}; - -// A min-heap for storing the starting nodes for the dives. -// This has a maximum size of 1024, such that the container -// will discard the least promising node if the queue is full. -template -class diving_queue_t { - private: - std::vector> buffer; - static constexpr i_t max_size_ = 1024; - - public: - diving_queue_t() { buffer.reserve(max_size_); } - - void push(diving_root_t&& node) - { - buffer.push_back(std::move(node)); - std::push_heap(buffer.begin(), buffer.end(), std::greater<>()); - if (buffer.size() > max_size() - 1) { buffer.pop_back(); } - } - - void emplace(mip_node_t&& node, std::vector&& lower, std::vector&& upper) - { - buffer.emplace_back(std::move(node), std::move(lower), std::move(upper)); - std::push_heap(buffer.begin(), buffer.end(), std::greater<>()); - if (buffer.size() > max_size() - 1) { buffer.pop_back(); } - } - - diving_root_t pop() - { - std::pop_heap(buffer.begin(), buffer.end(), std::greater<>()); - diving_root_t node = std::move(buffer.back()); - buffer.pop_back(); - return node; - } - - i_t size() const { return buffer.size(); } - constexpr i_t max_size() const { return max_size_; } - const diving_root_t& top() const { return buffer.front(); } - void clear() { buffer.clear(); } -}; - -} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/mip_node.hpp b/cpp/src/dual_simplex/mip_node.hpp index 1d66a21f7..de147132a 100644 --- a/cpp/src/dual_simplex/mip_node.hpp +++ b/cpp/src/dual_simplex/mip_node.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -45,6 +45,7 @@ class mip_node_t { branch_var_lower(-std::numeric_limits::infinity()), branch_var_upper(std::numeric_limits::infinity()), fractional_val(std::numeric_limits::infinity()), + objective_estimate(std::numeric_limits::infinity()), vstatus(0) { children[0] = nullptr; @@ -59,6 +60,7 @@ class mip_node_t { node_id(0), branch_var(-1), branch_dir(rounding_direction_t::NONE), + objective_estimate(std::numeric_limits::infinity()), vstatus(basis) { children[0] = nullptr; @@ -80,6 +82,7 @@ class mip_node_t { branch_var(branch_variable), branch_dir(branch_direction), fractional_val(branch_var_value), + objective_estimate(parent_node->objective_estimate), vstatus(basis) { @@ -227,17 +230,19 @@ class mip_node_t { mip_node_t detach_copy() const { mip_node_t copy(lower_bound, vstatus); - copy.branch_var = branch_var; - copy.branch_dir = branch_dir; - copy.branch_var_lower = branch_var_lower; - copy.branch_var_upper = branch_var_upper; - copy.fractional_val = fractional_val; - copy.node_id = node_id; + copy.branch_var = branch_var; + copy.branch_dir = branch_dir; + copy.branch_var_lower = branch_var_lower; + copy.branch_var_upper = branch_var_upper; + copy.fractional_val = fractional_val; + copy.objective_estimate = objective_estimate; + copy.node_id = node_id; return copy; } node_status_t status; f_t lower_bound; + f_t objective_estimate; i_t depth; i_t node_id; i_t branch_var; @@ -262,22 +267,6 @@ void remove_fathomed_nodes(std::vector*>& stack) } } -template -class node_compare_t { - public: - bool operator()(const mip_node_t& a, const mip_node_t& b) const - { - return a.lower_bound > - b.lower_bound; // True if a comes before b, elements that come before are output last - } - - bool operator()(const mip_node_t* a, const mip_node_t* b) const - { - return a->lower_bound > - b->lower_bound; // True if a comes before b, elements that come before are output last - } -}; - template class search_tree_t { public: diff --git a/cpp/src/dual_simplex/node_queue.hpp b/cpp/src/dual_simplex/node_queue.hpp new file mode 100644 index 000000000..28072795a --- /dev/null +++ b/cpp/src/dual_simplex/node_queue.hpp @@ -0,0 +1,162 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +namespace cuopt::linear_programming::dual_simplex { + +// This is a generic heap implementation based +// on the STL functions. The main benefit here is +// that we access the underlying container. +template +class heap_t { + public: + heap_t() = default; + virtual ~heap_t() = default; + + void push(const T& node) + { + buffer.push_back(node); + std::push_heap(buffer.begin(), buffer.end(), comp); + } + + void push(T&& node) + { + buffer.push_back(std::move(node)); + std::push_heap(buffer.begin(), buffer.end(), comp); + } + + template + void emplace(Args&&... args) + { + buffer.emplace_back(std::forward(args)...); + std::push_heap(buffer.begin(), buffer.end(), comp); + } + + std::optional pop() + { + if (buffer.empty()) return std::nullopt; + + std::pop_heap(buffer.begin(), buffer.end(), comp); + T node = std::move(buffer.back()); + buffer.pop_back(); + return node; + } + + size_t size() const { return buffer.size(); } + T& top() { return buffer.front(); } + void clear() { buffer.clear(); } + bool empty() const { return buffer.empty(); } + + private: + std::vector buffer; + Comp comp; +}; + +// A queue storing the nodes waiting to be explored/dived from. +template +class node_queue_t { + private: + struct heap_entry_t { + mip_node_t* node = nullptr; + f_t lower_bound = -inf; + f_t score = inf; + + heap_entry_t(mip_node_t* new_node) + : node(new_node), lower_bound(new_node->lower_bound), score(new_node->objective_estimate) + { + } + }; + + // Comparision function for ordering the nodes based on their lower bound with + // lowest one being explored first. + struct lower_bound_comp { + bool operator()(const std::shared_ptr& a, const std::shared_ptr& b) + { + // `a` will be placed after `b` + return a->lower_bound > b->lower_bound; + } + }; + + // Comparision function for ordering the nodes based on some score (currently the pseudocost + // estimate) with the lowest being explored first. + struct score_comp { + bool operator()(const std::shared_ptr& a, const std::shared_ptr& b) + { + // `a` will be placed after `b` + return a->score > b->score; + } + }; + + heap_t, lower_bound_comp> best_first_heap; + heap_t, score_comp> diving_heap; + omp_mutex_t mutex; + + public: + void push(mip_node_t* new_node) + { + std::lock_guard lock(mutex); + auto entry = std::make_shared(new_node); + best_first_heap.push(entry); + diving_heap.push(entry); + } + + std::optional*> pop_best_first() + { + auto entry = best_first_heap.pop(); + if (entry.has_value()) { return std::exchange(entry.value()->node, nullptr); } + return std::nullopt; + } + + std::optional*> pop_diving() + { + while (!diving_heap.empty()) { + auto entry = diving_heap.pop(); + if (entry.has_value()) { + if (auto node_ptr = entry.value()->node; node_ptr != nullptr) { return node_ptr; } + } + } + return std::nullopt; + } + + void lock() { mutex.lock(); } + + void unlock() { mutex.unlock(); } + + i_t diving_queue_size() + { + std::lock_guard lock(mutex); + return diving_heap.size(); + } + + i_t best_first_queue_size() + { + std::lock_guard lock(mutex); + return best_first_heap.size(); + } + + f_t get_lower_bound() + { + std::lock_guard lock(mutex); + return best_first_heap.empty() ? inf : best_first_heap.top()->lower_bound; + } + + mip_node_t* bfs_top() + { + std::lock_guard lock(mutex); + return best_first_heap.empty() ? nullptr : best_first_heap.top()->node; + } +}; + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 56298ef4d..2bc00f636 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -623,14 +623,17 @@ f_t compute_initial_primal_infeasibilities(const lp_problem_t& lp, const std::vector& basic_list, const std::vector& x, std::vector& squared_infeasibilities, - std::vector& infeasibility_indices) + std::vector& infeasibility_indices, + f_t& primal_inf) { const i_t m = lp.num_rows; const i_t n = lp.num_cols; - squared_infeasibilities.resize(n, 0.0); + squared_infeasibilities.resize(n); + std::fill(squared_infeasibilities.begin(), squared_infeasibilities.end(), 0.0); infeasibility_indices.reserve(n); infeasibility_indices.clear(); - f_t primal_inf = 0.0; + f_t primal_inf_squared = 0.0; + primal_inf = 0.0; for (i_t k = 0; k < m; ++k) { const i_t j = basic_list[k]; const f_t lower_infeas = lp.lower[j] - x[j]; @@ -640,10 +643,11 @@ f_t compute_initial_primal_infeasibilities(const lp_problem_t& lp, const f_t square_infeas = infeas * infeas; squared_infeasibilities[j] = square_infeas; infeasibility_indices.push_back(j); - primal_inf += square_infeas; + primal_inf_squared += square_infeas; + primal_inf += infeas; } } - return primal_inf; + return primal_inf_squared; } template @@ -2241,7 +2245,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, assert(superbasic_list.size() == 0); assert(nonbasic_list.size() == n - m); - if (ft.refactor_basis(lp.A, settings, basic_list, nonbasic_list, vstatus) > 0) { + if (ft.refactor_basis(lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus) > + 0) { return dual::status_t::NUMERICAL; } @@ -2268,7 +2273,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, #ifdef COMPUTE_DUAL_RESIDUAL std::vector dual_res1; - compute_dual_residual(lp.A, objective, y, z, dual_res1); + phase2::compute_dual_residual(lp.A, objective, y, z, dual_res1); f_t dual_res_norm = vector_norm_inf(dual_res1); if (dual_res_norm > settings.tight_tol) { settings.log.printf("|| A'*y + z - c || %e\n", dual_res_norm); @@ -2357,8 +2362,15 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, std::vector bounded_variables(n, 0); phase2::compute_bounded_info(lp.lower, lp.upper, bounded_variables); - f_t primal_infeasibility = phase2::compute_initial_primal_infeasibilities( - lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); + f_t primal_infeasibility; + f_t primal_infeasibility_squared = + phase2::compute_initial_primal_infeasibilities(lp, + settings, + basic_list, + x, + squared_infeasibilities, + infeasibility_indices, + primal_infeasibility); #ifdef CHECK_BASIC_INFEASIBILITIES phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 0); @@ -2556,9 +2568,15 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, std::vector unperturbed_x(n); phase2::compute_primal_solution_from_basis( lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x); - x = unperturbed_x; - primal_infeasibility = phase2::compute_initial_primal_infeasibilities( - lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); + x = unperturbed_x; + primal_infeasibility_squared = + phase2::compute_initial_primal_infeasibilities(lp, + settings, + basic_list, + x, + squared_infeasibilities, + infeasibility_indices, + primal_infeasibility); settings.log.printf("Updated primal infeasibility: %e\n", primal_infeasibility); objective = lp.objective; @@ -2593,9 +2611,15 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, std::vector unperturbed_x(n); phase2::compute_primal_solution_from_basis( lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x); - x = unperturbed_x; - primal_infeasibility = phase2::compute_initial_primal_infeasibilities( - lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); + x = unperturbed_x; + primal_infeasibility_squared = + phase2::compute_initial_primal_infeasibilities(lp, + settings, + basic_list, + x, + squared_infeasibilities, + infeasibility_indices, + primal_infeasibility); const f_t orig_dual_infeas = phase2::dual_infeasibility( lp, settings, vstatus, z, settings.tight_tol, settings.dual_tol); @@ -2810,7 +2834,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, delta_xB_0_sparse.i, squared_infeasibilities, infeasibility_indices, - primal_infeasibility); + primal_infeasibility_squared); // Update primal infeasibilities due to changes in basic variables // from the leaving and entering variables phase2::update_primal_infeasibilities(lp, @@ -2822,7 +2846,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, scaled_delta_xB_sparse.i, squared_infeasibilities, infeasibility_indices, - primal_infeasibility); + primal_infeasibility_squared); // Update the entering variable phase2::update_single_primal_infeasibility(lp.lower, lp.upper, @@ -2883,14 +2907,15 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, #endif if (should_refactor) { bool should_recompute_x = false; - if (ft.refactor_basis(lp.A, settings, basic_list, nonbasic_list, vstatus) > 0) { + if (ft.refactor_basis( + lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus) > 0) { should_recompute_x = true; settings.log.printf("Failed to factorize basis. Iteration %d\n", iter); if (toc(start_time) > settings.time_limit) { return dual::status_t::TIME_LIMIT; } i_t count = 0; i_t deficient_size; - while ((deficient_size = - ft.refactor_basis(lp.A, settings, basic_list, nonbasic_list, vstatus)) > 0) { + while ((deficient_size = ft.refactor_basis( + lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus)) > 0) { settings.log.printf("Failed to repair basis. Iteration %d. %d deficient columns.\n", iter, static_cast(deficient_size)); @@ -2912,8 +2937,14 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x); x = unperturbed_x; } - phase2::compute_initial_primal_infeasibilities( - lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); + primal_infeasibility_squared = + phase2::compute_initial_primal_infeasibilities(lp, + settings, + basic_list, + x, + squared_infeasibilities, + infeasibility_indices, + primal_infeasibility); } #ifdef CHECK_BASIC_INFEASIBILITIES phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 7); @@ -2951,7 +2982,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, iter, compute_user_objective(lp, obj), infeasibility_indices.size(), - primal_infeasibility, + primal_infeasibility_squared, sum_perturb, now); } diff --git a/cpp/src/dual_simplex/primal.cpp b/cpp/src/dual_simplex/primal.cpp index 80406dcf0..69f15ba18 100644 --- a/cpp/src/dual_simplex/primal.cpp +++ b/cpp/src/dual_simplex/primal.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -298,7 +298,15 @@ primal::status_t primal_phase2(i_t phase, factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); - basis_repair(lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); + basis_repair(lp.A, + settings, + lp.lower, + lp.upper, + deficient, + slacks_needed, + basic_list, + nonbasic_list, + vstatus); if (factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); diff --git a/cpp/src/dual_simplex/pseudo_costs.cpp b/cpp/src/dual_simplex/pseudo_costs.cpp index 9f84e108d..0a8f660e9 100644 --- a/cpp/src/dual_simplex/pseudo_costs.cpp +++ b/cpp/src/dual_simplex/pseudo_costs.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -148,8 +148,8 @@ void strong_branching(const lp_problem_t& original_lp, pseudo_costs_t& pc) { pc.resize(original_lp.num_cols); - pc.strong_branch_down.resize(fractional.size()); - pc.strong_branch_up.resize(fractional.size()); + pc.strong_branch_down.assign(fractional.size(), 0); + pc.strong_branch_up.assign(fractional.size(), 0); pc.num_strong_branches_completed = 0; settings.log.printf("Strong branching using %d threads and %ld fractional variables\n", @@ -199,7 +199,7 @@ template void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_ptr, f_t leaf_objective) { - mutex.lock(); + std::lock_guard lock(mutex); const f_t change_in_obj = leaf_objective - node_ptr->lower_bound; const f_t frac = node_ptr->branch_dir == rounding_direction_t::DOWN ? node_ptr->fractional_val - std::floor(node_ptr->fractional_val) @@ -211,7 +211,6 @@ void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_pt pseudo_cost_sum_up[node_ptr->branch_var] += change_in_obj / frac; pseudo_cost_num_up[node_ptr->branch_var]++; } - mutex.unlock(); } template @@ -258,7 +257,7 @@ i_t pseudo_costs_t::variable_selection(const std::vector& fractio const std::vector& solution, logger_t& log) { - mutex.lock(); + std::lock_guard lock(mutex); const i_t num_fractional = fractional.size(); std::vector pseudo_cost_up(num_fractional); @@ -312,11 +311,53 @@ i_t pseudo_costs_t::variable_selection(const std::vector& fractio log.printf( "pc branching on %d. Value %e. Score %e\n", branch_var, solution[branch_var], score[select]); - mutex.unlock(); - return branch_var; } +template +f_t pseudo_costs_t::obj_estimate(const std::vector& fractional, + const std::vector& solution, + f_t lower_bound, + logger_t& log) +{ + std::lock_guard lock(mutex); + + const i_t num_fractional = fractional.size(); + f_t estimate = lower_bound; + + i_t num_initialized_down; + i_t num_initialized_up; + f_t pseudo_cost_down_avg; + f_t pseudo_cost_up_avg; + + initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); + + for (i_t k = 0; k < num_fractional; k++) { + const i_t j = fractional[k]; + f_t pseudo_cost_down = 0; + f_t pseudo_cost_up = 0; + + if (pseudo_cost_num_down[j] != 0) { + pseudo_cost_down = pseudo_cost_sum_down[j] / pseudo_cost_num_down[j]; + } else { + pseudo_cost_down = pseudo_cost_down_avg; + } + + if (pseudo_cost_num_up[j] != 0) { + pseudo_cost_up = pseudo_cost_sum_up[j] / pseudo_cost_num_up[j]; + } else { + pseudo_cost_up = pseudo_cost_up_avg; + } + constexpr f_t eps = 1e-6; + const f_t f_down = solution[j] - std::floor(solution[j]); + const f_t f_up = std::ceil(solution[j]) - solution[j]; + estimate += std::min(pseudo_cost_down * f_down, pseudo_cost_up * f_up); + } + + log.printf("pseudocost estimate = %e\n", estimate); + return estimate; +} + template void pseudo_costs_t::update_pseudo_costs_from_strong_branching( const std::vector& fractional, const std::vector& root_soln) diff --git a/cpp/src/dual_simplex/pseudo_costs.hpp b/cpp/src/dual_simplex/pseudo_costs.hpp index 799cdc3ff..4bab438fa 100644 --- a/cpp/src/dual_simplex/pseudo_costs.hpp +++ b/cpp/src/dual_simplex/pseudo_costs.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -32,10 +32,10 @@ class pseudo_costs_t { void resize(i_t num_variables) { - pseudo_cost_sum_down.resize(num_variables); - pseudo_cost_sum_up.resize(num_variables); - pseudo_cost_num_down.resize(num_variables); - pseudo_cost_num_up.resize(num_variables); + pseudo_cost_sum_down.assign(num_variables, 0); + pseudo_cost_sum_up.assign(num_variables, 0); + pseudo_cost_num_down.assign(num_variables, 0); + pseudo_cost_num_up.assign(num_variables, 0); } void initialized(i_t& num_initialized_down, @@ -43,6 +43,11 @@ class pseudo_costs_t { f_t& pseudo_cost_down_avg, f_t& pseudo_cost_up_avg) const; + f_t obj_estimate(const std::vector& fractional, + const std::vector& solution, + f_t lower_bound, + logger_t& log); + i_t variable_selection(const std::vector& fractional, const std::vector& solution, logger_t& log); diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index a1cc049e7..d86f84c39 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -13,12 +13,29 @@ #include #include #include +#include #include #include #include namespace cuopt::linear_programming::dual_simplex { +template +struct diving_heuristics_settings_t { + i_t num_diving_workers = -1; + + // -1 automatic, 0 disabled, 1 enabled + i_t line_search_diving = -1; + i_t pseudocost_diving = -1; + i_t guided_diving = -1; + i_t coefficient_diving = -1; + + i_t min_node_depth = 10; + i_t node_limit = 500; + f_t iteration_limit_factor = 0.05; + i_t backtrack_limit = 5; +}; + template struct simplex_solver_settings_t { public: @@ -70,14 +87,14 @@ struct simplex_solver_settings_t { iteration_log_frequency(1000), first_iteration_log(2), num_threads(omp_get_max_threads() - 1), - num_bfs_threads(std::min(num_threads / 4, 1)), - num_diving_threads(std::min(num_threads - num_bfs_threads, 1)), + num_bfs_workers(std::max(num_threads / 4, 1)), random_seed(0), inside_mip(0), solution_callback(nullptr), heuristic_preemption_callback(nullptr), concurrent_halt(nullptr) { + diving_settings.num_diving_workers = std::max(num_threads - num_bfs_workers, 1); } void set_log(bool logging) const { log.log = logging; } @@ -137,8 +154,10 @@ struct simplex_solver_settings_t { i_t first_iteration_log; // number of iterations to log at beginning of solve i_t num_threads; // number of threads to use i_t random_seed; // random seed - i_t num_bfs_threads; // number of threads dedicated to the best-first search - i_t num_diving_threads; // number of threads dedicated to diving + i_t num_bfs_workers; // number of threads dedicated to the best-first search + + diving_heuristics_settings_t diving_settings; // Settings for the diving heuristics + i_t inside_mip; // 0 if outside MIP, 1 if inside MIP at root node, 2 if inside MIP at leaf node std::function&, f_t)> solution_callback; std::function&, f_t)> node_processed_callback; diff --git a/cpp/src/mip/diversity/lns/rins.cu b/cpp/src/mip/diversity/lns/rins.cu index 537f49cea..7456b59ed 100644 --- a/cpp/src/mip/diversity/lns/rins.cu +++ b/cpp/src/mip/diversity/lns/rins.cu @@ -256,13 +256,19 @@ void rins_t::run_rins() branch_and_bound_settings.absolute_mip_gap_tol = context.settings.tolerances.absolute_mip_gap; branch_and_bound_settings.relative_mip_gap_tol = std::min(current_mip_gap, (f_t)settings.target_mip_gap); - branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; - branch_and_bound_settings.num_threads = 2; - branch_and_bound_settings.num_bfs_threads = 1; - branch_and_bound_settings.num_diving_threads = 1; - branch_and_bound_settings.log.log = false; - branch_and_bound_settings.log.log_prefix = "[RINS] "; - branch_and_bound_settings.solution_callback = [&rins_solution_queue](std::vector& solution, + branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; + branch_and_bound_settings.num_threads = 2; + branch_and_bound_settings.num_bfs_workers = 1; + + // In the future, let RINS use all the diving heuristics. For now, + // restricting to guided diving. + branch_and_bound_settings.diving_settings.num_diving_workers = 1; + branch_and_bound_settings.diving_settings.line_search_diving = 0; + branch_and_bound_settings.diving_settings.coefficient_diving = 0; + branch_and_bound_settings.diving_settings.pseudocost_diving = 0; + branch_and_bound_settings.log.log = false; + branch_and_bound_settings.log.log_prefix = "[RINS] "; + branch_and_bound_settings.solution_callback = [&rins_solution_queue](std::vector& solution, f_t objective) { rins_solution_queue.push_back(solution); }; diff --git a/cpp/src/mip/diversity/recombiners/sub_mip.cuh b/cpp/src/mip/diversity/recombiners/sub_mip.cuh index 5be807372..82670437a 100644 --- a/cpp/src/mip/diversity/recombiners/sub_mip.cuh +++ b/cpp/src/mip/diversity/recombiners/sub_mip.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -103,9 +103,15 @@ class sub_mip_recombiner_t : public recombiner_t { branch_and_bound_settings.relative_mip_gap_tol = context.settings.tolerances.relative_mip_gap; branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; branch_and_bound_settings.num_threads = 2; - branch_and_bound_settings.num_bfs_threads = 1; - branch_and_bound_settings.num_diving_threads = 1; - branch_and_bound_settings.solution_callback = [this](std::vector& solution, + branch_and_bound_settings.num_bfs_workers = 1; + + // In the future, let SubMIP use all the diving heuristics. For now, + // restricting to guided diving. + branch_and_bound_settings.diving_settings.num_diving_workers = 1; + branch_and_bound_settings.diving_settings.line_search_diving = 0; + branch_and_bound_settings.diving_settings.coefficient_diving = 0; + branch_and_bound_settings.diving_settings.pseudocost_diving = 0; + branch_and_bound_settings.solution_callback = [this](std::vector& solution, f_t objective) { this->solution_callback(solution, objective); }; diff --git a/cpp/src/mip/local_search/rounding/simple_rounding_kernels.cuh b/cpp/src/mip/local_search/rounding/simple_rounding_kernels.cuh index 2906e648f..5cd219ec3 100644 --- a/cpp/src/mip/local_search/rounding/simple_rounding_kernels.cuh +++ b/cpp/src/mip/local_search/rounding/simple_rounding_kernels.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -32,6 +32,7 @@ __global__ void simple_rounding_kernel(typename solution_t::view_t sol auto cstr_idx = solution.problem.reverse_constraints[i]; auto cstr_coeff = solution.problem.reverse_coefficients[i]; + // Here, we are storing the constraints in the following format: u <= Ax <= l // boxed constraint. can't be rounded safely if (std::isfinite(solution.problem.constraint_lower_bounds[cstr_idx]) && std::isfinite(solution.problem.constraint_upper_bounds[cstr_idx])) { diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu index 7311a26fd..08e1806b9 100644 --- a/cpp/src/mip/solver.cu +++ b/cpp/src/mip/solver.cu @@ -173,13 +173,12 @@ solution_t mip_solver_t::run_solver() } else { branch_and_bound_settings.num_threads = std::max(1, context.settings.num_cpu_threads); } - CUOPT_LOG_INFO("Using %d CPU threads for B&B", branch_and_bound_settings.num_threads); - i_t num_threads = branch_and_bound_settings.num_threads; - i_t num_bfs_threads = std::max(1, num_threads / 4); - i_t num_diving_threads = std::max(1, num_threads - num_bfs_threads); - branch_and_bound_settings.num_bfs_threads = num_bfs_threads; - branch_and_bound_settings.num_diving_threads = num_diving_threads; + i_t num_threads = branch_and_bound_settings.num_threads; + i_t num_bfs_workers = std::max(1, num_threads / 4); + i_t num_diving_workers = std::max(1, num_threads - num_bfs_workers); + branch_and_bound_settings.num_bfs_workers = num_bfs_workers; + branch_and_bound_settings.diving_settings.num_diving_workers = num_diving_workers; // Set the branch and bound -> primal heuristics callback branch_and_bound_settings.solution_callback = diff --git a/cpp/src/utilities/omp_helpers.hpp b/cpp/src/utilities/omp_helpers.hpp index 33eda66cb..e1b68bf88 100644 --- a/cpp/src/utilities/omp_helpers.hpp +++ b/cpp/src/utilities/omp_helpers.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -53,7 +53,7 @@ class omp_atomic_t { T operator--() { return fetch_sub(T(1)) - 1; } T operator--(int) { return fetch_sub(T(1)); } - T load() + T load() const { T res; #pragma omp atomic read