From 52054719037b1db304862c4b0963dc58e01de491 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Mon, 17 Nov 2025 14:10:28 -0800
Subject: [PATCH 01/27] Add constraints and adjust dual simplex to warm start
 from the current basis

---
 cpp/src/dual_simplex/basis_updates.cpp      | 157 ++++++++++++++++
 cpp/src/dual_simplex/basis_updates.hpp      |   2 +
 cpp/src/dual_simplex/solve.cpp              | 191 ++++++++++++++++++++
 cpp/src/dual_simplex/solve.hpp              |  13 ++
 cpp/src/dual_simplex/sparse_matrix.cpp      |  45 +++++
 cpp/src/dual_simplex/sparse_matrix.hpp      |   2 +
 cpp/src/dual_simplex/sparse_vector.cpp      |  15 ++
 cpp/src/dual_simplex/sparse_vector.hpp      |   2 +
 cpp/tests/dual_simplex/unit_tests/solve.cpp | 108 +++++++++++
 9 files changed, 535 insertions(+)
diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp
index 3e16411f4..2a8c34d1c 100644
--- a/cpp/src/dual_simplex/basis_updates.cpp
+++ b/cpp/src/dual_simplex/basis_updates.cpp
@@ -1108,6 +1108,163 @@ i_t basis_update_t<i_t, f_t>::lower_triangular_multiply(const csc_matrix_t<i_t,
   return new_nz;
 }
 
+// Start of middle product form: basis_update_mpf_t
+
+template <typename i_t, typename f_t>
+i_t basis_update_mpf_t<i_t, f_t>::append_cuts(const csr_matrix_t<i_t, f_t>& cuts_basic)
+{
+  const i_t m = L0_.m;
+
+  // Solve for U^T W^T = C_B^T
+  // We do this one row at a time of C_B
+  csc_matrix_t<i_t, f_t> WT(m, cuts_basic.m, 0);
+  printf("Constructing WT\n");
+
+  i_t WT_nz = 0;
+  for (i_t k = 0; k < cuts_basic.m; k++) {
+    sparse_vector_t<i_t, f_t> rhs(cuts_basic, k);
+    u_transpose_solve(rhs);
+    WT.col_start[k] = WT_nz;
+    for (i_t q = 0; q < rhs.i.size(); q++) {
+      WT.i.push_back(rhs.i[q]);
+      WT.x.push_back(rhs.x[q]);
+      WT_nz++;
+    }
+  }
+  WT.col_start[cuts_basic.m] = WT_nz;
+
+  printf("Constructing V (num updates %d)\n", num_updates_);
+
+  csc_matrix_t<i_t, f_t> V(cuts_basic.m, m, 0);
+  if (num_updates_ > 0) {
+    // W = V T_0 ... T_{num_updates_ - 1}
+    // or V = W T_{num_updates_ - 1}^{-1} ... T_0^{-1}
+    // or V^T = T_0^{-T} ... T_{num_updates_ - 1}^{-T} W^T
+    // We can compute V^T column by column so that we have
+    // V^T(:, h) = T_0^{-T} ... T_{num_updates_ - 1}^{-T} W^T(:, h)
+    // or
+    // V(h, :) = T_0^{-T} ... T_{num_updates_ - 1}^{-T} W^T(:, h)
+
+    csr_matrix_t<i_t, f_t> V_row(cuts_basic.m, m, 0);
+    i_t V_nz = 0;
+    const f_t zero_tol = 1e-13;
+    for (i_t h = 0; h < cuts_basic.m; h++) {
+       sparse_vector_t rhs(WT, h);
+       scatter_into_workspace(rhs);
+       i_t nz = rhs.i.size();
+       for (i_t k = num_updates_ - 1; k >= 0; --k) {
+        // T_k^{-T} = ( I - v u^T/(1 + u^T v))
+        // T_k^{-T} * b = b - v * (u^T * b) / (1 + u^T * v) = b - theta * v, theta = u^T b / mu
+
+        const i_t u_col = 2 * k;
+        const i_t v_col = 2 * k + 1;
+        const f_t mu    = mu_values_[k];
+
+        // dot = u^T * b
+        f_t dot = dot_product(u_col, xi_workspace_, x_workspace_);
+        const f_t theta = dot / mu;
+        if (std::abs(theta) > zero_tol) {
+          add_sparse_column(S_, v_col, -theta, xi_workspace_, nz, x_workspace_);
+        }
+      }
+      gather_into_sparse_vector(nz, rhs);
+      V_row.row_start[h] = V_nz;
+      for (i_t q = 0; q < rhs.i.size(); q++) {
+        V_row.j.push_back(rhs.i[q]);
+        V_row.x.push_back(rhs.x[q]);
+        V_nz++;
+      }
+    }
+    V_row.row_start[cuts_basic.m] = V_nz;
+
+    V_row.to_compressed_col(V);
+  }
+  else
+  {
+    // W = V
+    WT.transpose(V);
+  }
+
+  // Extend u_i, v_i for i = 0, ..., num_updates_ - 1
+  S_.m += cuts_basic.m;
+
+  // Adjust L and U
+  // L = [ L0  0 ]
+  //     [ V   I ]
+  printf("Adjusting L\n");
+
+  i_t V_nz = V.col_start[m];
+  i_t L_nz = L0_.col_start[m];
+  csc_matrix_t<i_t, f_t> new_L(m + cuts_basic.m, m + cuts_basic.m, L_nz + V_nz + cuts_basic.m);
+  L_nz = 0;
+  for (i_t j = 0; j < m; ++j) {
+    new_L.col_start[j] = L_nz;
+    const i_t col_start = L0_.col_start[j];
+    const i_t col_end = L0_.col_start[j + 1];
+    for (i_t p = col_start; p < col_end; ++p) {
+      new_L.i[L_nz] = L0_.i[p];
+      new_L.x[L_nz] = L0_.x[p];
+      L_nz++;
+    }
+    const i_t V_col_start = V.col_start[j];
+    const i_t V_col_end = V.col_start[j + 1];
+    for (i_t p = V_col_start; p < V_col_end; ++p) {
+      new_L.i[L_nz] = V.i[p] + m;
+      new_L.x[L_nz] = V.x[p];
+      L_nz++;
+    }
+  }
+  for (i_t j = m; j < m + cuts_basic.m; ++j) {
+    new_L.col_start[j] = L_nz;
+    new_L.i[L_nz] = j;
+    new_L.x[L_nz] = 1.0;
+    L_nz++;
+  }
+  new_L.col_start[m + cuts_basic.m] = L_nz;
+
+  L0_ = new_L;
+
+
+  // Adjust U
+  // U = [ U0 0 ]
+  //     [ 0  I ]
+  printf("Adjusting U\n");
+
+  i_t U_nz = U0_.col_start[m];
+  U0_.col_start.resize(m + cuts_basic.m + 1);
+  U0_.i.resize(U_nz + cuts_basic.m);
+  U0_.x.resize(U_nz + cuts_basic.m);
+  for (i_t k = m; k < m + cuts_basic.m; ++k) {
+    U0_.col_start[k] = U_nz;
+    U0_.i[U_nz] = k;
+    U0_.x[U_nz] = 1.0;
+    U_nz++;
+  }
+  U0_.col_start[m + cuts_basic.m] = U_nz;
+  U0_.n = m + cuts_basic.m;
+  U0_.m = m + cuts_basic.m;
+
+  printf("Computing transposes\n");
+  compute_transposes();
+
+
+  // Adjust row_permutation_ and inverse_row_permutation_
+  printf("Adjusting row_permutation_ and inverse_row_permutation_\n");
+  row_permutation_.resize(m + cuts_basic.m);
+  inverse_row_permutation_.resize(m + cuts_basic.m);
+  for (i_t k = m; k < m + cuts_basic.m; ++k) {
+    row_permutation_[k] = k;
+  }
+  inverse_permutation(row_permutation_, inverse_row_permutation_);
+
+  // Adjust workspace sizes
+  printf("Adjusting workspace sizes\n");
+  xi_workspace_.resize(2 * (m + cuts_basic.m), 0);
+  x_workspace_.resize(m + cuts_basic.m, 0.0);
+
+  return 0;
+}
+
 template <typename i_t, typename f_t>
 void basis_update_mpf_t<i_t, f_t>::gather_into_sparse_vector(i_t nz,
                                                              sparse_vector_t<i_t, f_t>& out) const
diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp
index 078dfffeb..283a1513e 100644
--- a/cpp/src/dual_simplex/basis_updates.hpp
+++ b/cpp/src/dual_simplex/basis_updates.hpp
@@ -291,6 +291,8 @@ class basis_update_mpf_t {
     reset_stats();
   }
 
+  i_t append_cuts(const csr_matrix_t<i_t, f_t>& cuts_basic);
+
   f_t estimate_solution_density(f_t rhs_nz, f_t sum, i_t& num_calls, bool& use_hypersparse) const
   {
     num_calls++;
diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp
index 5c5f9e165..5aa74906d 100644
--- a/cpp/src/dual_simplex/solve.cpp
+++ b/cpp/src/dual_simplex/solve.cpp
@@ -293,6 +293,184 @@ lp_status_t solve_linear_program_with_advanced_basis(
   return lp_status;
 }
 
+template <typename i_t, typename f_t>
+lp_status_t solve_linear_program_with_cuts(
+  const f_t start_time,
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  const csr_matrix_t<i_t, f_t>& cuts,
+  const std::vector<f_t>& cut_rhs,
+  lp_problem_t<i_t, f_t>& lp,
+  lp_solution_t<i_t, f_t>& solution,
+  basis_update_mpf_t<i_t, f_t>& basis_update,
+  std::vector<i_t>& basic_list,
+  std::vector<i_t>& nonbasic_list,
+  std::vector<variable_status_t>& vstatus,
+  std::vector<f_t>& edge_norms) {
+  // Given a set of cuts: C*x <= d that are currently violated
+  // by the current solution x* (i.e. C*x* > d), this function
+  // adds the cuts into the LP and solves again.
+
+  const i_t p = cuts.m;
+  if (cut_rhs.size() != static_cast<size_t>(p)) {
+    settings.log.printf("cut_rhs must have the same number of rows as cuts\n");
+    return lp_status_t::NUMERICAL_ISSUES;
+  }
+  printf("Number of cuts %d\n", p);
+  printf("Original lp rows %d\n", lp.num_rows);
+  printf("Original lp cols %d\n", lp.num_cols);
+
+  csr_matrix_t<i_t, f_t> new_A_row(lp.num_rows, lp.num_cols, 1);
+  printf("Converting A to compressed row\n");
+  lp.A.to_compressed_row(new_A_row);
+
+
+  printf("Appening cuts\n");
+  new_A_row.append_rows(cuts);
+
+  printf("Converting back to compressed column\n");
+  csc_matrix_t<i_t, f_t> new_A_col(lp.num_rows + p, lp.num_cols, 1);
+  new_A_row.to_compressed_col(new_A_col);
+  printf("new A col rows %d cols %d\n", new_A_col.m, new_A_col.n);
+
+
+  printf("Adding slacks\n");
+  // Add in slacks variables for the new rows
+  lp.lower.resize(lp.num_cols + p);
+  lp.upper.resize(lp.num_cols + p);
+  lp.objective.resize(lp.num_cols + p);
+  i_t nz = new_A_col.col_start[lp.num_cols];
+  new_A_col.col_start.resize(lp.num_cols + p + 1);
+  new_A_col.i.resize(nz + p);
+  new_A_col.x.resize(nz + p);
+  i_t k = lp.num_rows;
+  for (i_t j = lp.num_cols; j < lp.num_cols + p; j++) {
+    new_A_col.col_start[j] = nz;
+    new_A_col.i[nz] = k++;
+    new_A_col.x[nz] = 1.0;
+    nz++;
+    lp.lower[j] = 0.0;
+    lp.upper[j] = inf;
+    lp.objective[j] = 0.0;
+  }
+  new_A_col.col_start[lp.num_cols + p] = nz;
+  new_A_col.n = lp.num_cols + p;
+  printf("new A col rows %d cols %d\n", new_A_col.m, new_A_col.n);
+  printf("new A nnz %d\n", new_A_col.col_start[lp.num_cols + p]);
+
+  lp.A = new_A_col;
+  i_t old_rows = lp.num_rows;
+  lp.num_rows += p;
+  printf("lp rows %d A rows %d\n", lp.num_rows, lp.A.m);
+  i_t old_cols = lp.num_cols;
+  lp.num_cols += p;
+  printf("lp cols %d A cols %d\n", lp.num_cols, lp.A.n);
+
+  printf("New A matrix\n");
+  lp.A.print_matrix(stdout);
+
+  printf("Adding rhs\n");
+  lp.rhs.resize(lp.num_rows);
+  for (i_t k = old_rows; k < old_rows + p; k++) {
+    const i_t h = k - old_rows;
+    lp.rhs[k] = cut_rhs[h];
+  }
+
+
+  printf("Constructing column degree\n");
+  // Construct C_B = C(:, basic_list)
+  std::vector<i_t> C_col_degree(p, 0);
+  i_t cuts_nz = cuts.row_start[p];
+  for (i_t q = 0; q < cuts_nz; q++) {
+    const i_t j = cuts.j[q];
+    C_col_degree[j]++;
+  }
+
+  std::vector<i_t> in_basis(old_cols, 0);
+  const i_t num_basic = static_cast<i_t>(basic_list.size());
+  i_t C_B_nz = 0;
+  for (i_t k = 0; k < num_basic; k++) {
+    const i_t j = basic_list[k];
+    in_basis[j] = 1;
+    C_B_nz += C_col_degree[j];
+  }
+
+  printf("Constructing C_B\n");
+  csr_matrix_t<i_t, f_t> C_B(num_basic, num_basic, C_B_nz);
+  nz = 0;
+  for (i_t i = 0; i < p; i++) {
+    C_B.row_start[i] = nz;
+    const i_t row_start = cuts.row_start[i];
+    const i_t row_end = cuts.row_start[i+1];
+    for (i_t q = row_start; q < row_end; q++) {
+      const i_t j = cuts.j[q];
+      if (in_basis[j] == 0) { continue; }
+      C_B.j[nz] = j;
+      C_B.x[nz] = cuts.x[q];
+      nz++;
+    }
+  }
+  C_B.row_start[p] = nz;
+  settings.log.printf("predicted nz %d actual nz %d\n", C_B_nz, nz);
+  if (nz != C_B_nz) {
+    exit(1);
+  }
+
+  printf("Adjusting basis update\n");
+  // Adjust the basis update to include the new cuts
+  basis_update.append_cuts(C_B);
+
+
+  // Adjust the vstatus
+  vstatus.resize(lp.num_cols);
+  for (i_t j = old_cols; j < lp.num_cols; j++) {
+    vstatus[j] = variable_status_t::BASIC;
+  }
+
+  basic_list.resize(lp.num_rows, 0);
+  i_t h = old_cols;
+  for (i_t j = old_rows; j < lp.num_rows; j++) {
+    basic_list[j] = h++;
+  }
+
+  printf("basic list\n");
+  for (i_t k = 0; k < basic_list.size(); k++) {
+    printf("%d ", basic_list[k]);
+  }
+  printf("\n");
+
+  // Adjust the solution
+  solution.x.resize(lp.num_cols, 0.0);
+  solution.y.resize(lp.num_rows, 0.0);
+  solution.z.resize(lp.num_cols, 0.0);
+
+  // For now just clear the edge norms
+  edge_norms.clear();
+  i_t iter = 0;
+  dual::status_t status = dual_phase2_with_advanced_basis(2,
+                                                          0,
+                                                          false,
+                                                          start_time,
+                                                          lp,
+                                                          settings,
+                                                          vstatus,
+                                                          basis_update,
+                                                          basic_list,
+                                                          nonbasic_list,
+                                                          solution,
+                                                          iter,
+                                                          edge_norms);
+
+  lp_status_t lp_status;
+  if (status == dual::status_t::OPTIMAL) { lp_status = lp_status_t::OPTIMAL; }
+  if (status == dual::status_t::DUAL_UNBOUNDED) { lp_status = lp_status_t::INFEASIBLE; }
+  if (status == dual::status_t::TIME_LIMIT) { lp_status = lp_status_t::TIME_LIMIT; }
+  if (status == dual::status_t::ITERATION_LIMIT) { lp_status = lp_status_t::ITERATION_LIMIT; }
+  if (status == dual::status_t::CONCURRENT_LIMIT) { lp_status = lp_status_t::CONCURRENT_LIMIT; }
+  if (status == dual::status_t::NUMERICAL) { lp_status = lp_status_t::NUMERICAL_ISSUES; }
+  if (status == dual::status_t::CUTOFF) { lp_status = lp_status_t::CUTOFF; }
+  return lp_status;
+}
+
 template <typename i_t, typename f_t>
 lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& user_problem,
                                               const simplex_solver_settings_t<i_t, f_t>& settings,
@@ -661,6 +839,19 @@ template lp_status_t solve_linear_program_with_advanced_basis(
   std::vector<variable_status_t>& vstatus,
   std::vector<double>& edge_norms);
 
+template lp_status_t solve_linear_program_with_cuts(
+  const double start_time,
+  const simplex_solver_settings_t<int, double>& settings,
+  const csr_matrix_t<int, double>& cuts,
+  const std::vector<double>& cut_rhs,
+  lp_problem_t<int, double>& lp,
+  lp_solution_t<int, double>& solution,
+  basis_update_mpf_t<int, double>& basis_update,
+  std::vector<int>& basic_list,
+  std::vector<int>& nonbasic_list,
+  std::vector<variable_status_t>& vstatus,
+  std::vector<double>& edge_norms);
+
 template lp_status_t solve_linear_program_with_barrier(
   const user_problem_t<int, double>& user_problem,
   const simplex_solver_settings_t<int, double>& settings,
diff --git a/cpp/src/dual_simplex/solve.hpp b/cpp/src/dual_simplex/solve.hpp
index e96229784..d659d6282 100644
--- a/cpp/src/dual_simplex/solve.hpp
+++ b/cpp/src/dual_simplex/solve.hpp
@@ -61,6 +61,19 @@ lp_status_t solve_linear_program_with_advanced_basis(
   std::vector<variable_status_t>& vstatus,
   std::vector<f_t>& edge_norms);
 
+template <typename i_t, typename f_t>
+lp_status_t solve_linear_program_with_cuts(const f_t start_time,
+                                           const simplex_solver_settings_t<i_t, f_t>& settings,
+                                           const csr_matrix_t<i_t, f_t>& cuts,
+                                           const std::vector<f_t>& cut_rhs,
+                                           lp_problem_t<i_t, f_t>& lp,
+                                           lp_solution_t<i_t, f_t>& solution,
+                                           basis_update_mpf_t<i_t, f_t>& basis_update,
+                                           std::vector<i_t>& basic_list,
+                                           std::vector<i_t>& nonbasic_list,
+                                           std::vector<variable_status_t>& vstatus,
+                                           std::vector<f_t>& edge_norms);
+
 template <typename i_t, typename f_t>
 lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& user_problem,
                                               const simplex_solver_settings_t<i_t, f_t>& settings,
diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp
index cdd45f720..c2fc343ce 100644
--- a/cpp/src/dual_simplex/sparse_matrix.cpp
+++ b/cpp/src/dual_simplex/sparse_matrix.cpp
@@ -357,6 +357,51 @@ i_t csc_matrix_t<i_t, f_t>::remove_row(i_t row)
   return 0;
 }
 
+template <typename i_t, typename f_t>
+i_t csr_matrix_t<i_t, f_t>::append_rows(const csr_matrix_t<i_t, f_t>& C)
+{
+  const i_t old_m = this->m;
+  const i_t n = this->n;
+  const i_t old_nz = this->row_start[old_m];
+  const i_t C_row = C.m;
+  if (0 && C.n != n) {
+    printf("C n %d != n %d\n", C.n, n);
+    return -1;
+  }
+  const i_t C_nz = C.row_start[C_row];
+  const i_t new_nz = old_nz + C_nz;
+  const i_t new_m = old_m + C_row;
+  printf("old m %d C_row %d new m %d\n", old_m, C_row, new_m);
+
+  this->j.resize(new_nz);
+  this->x.resize(new_nz);
+  this->row_start.resize(new_m + 1);
+
+  i_t nz = old_nz;
+  for (i_t i = old_m; i < new_m; i++) {
+    const i_t k = i - old_m;
+    const i_t nz_row = C.row_start[k+1] - C.row_start[k];
+    this->row_start[i] = nz;
+    nz += nz_row;
+  }
+  this->row_start[new_m] = nz;
+
+  for (i_t p = old_nz; p < new_nz; p++) {
+    const i_t q = p - old_nz;
+    this->j[p] = C.j[q];
+  }
+
+  for (i_t p = old_nz; p < new_nz; p++) {
+    const i_t q = p - old_nz;
+    this->x[p] = C.x[q];
+  }
+
+  this->m = new_m;
+  this->nz_max = new_nz;
+  return 0;
+}
+
+
 template <typename i_t, typename f_t>
 void csc_matrix_t<i_t, f_t>::print_matrix(FILE* fid) const
 {
diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp
index c14e6d0f1..eefc31b1f 100644
--- a/cpp/src/dual_simplex/sparse_matrix.hpp
+++ b/cpp/src/dual_simplex/sparse_matrix.hpp
@@ -136,6 +136,8 @@ class csr_matrix_t {
   // Create a new matrix with the marked rows removed
   i_t remove_rows(std::vector<i_t>& row_marker, csr_matrix_t<i_t, f_t>& Aout) const;
 
+  i_t append_rows(const csr_matrix_t<i_t, f_t>& C);
+
   // Ensures no repeated column indices within a row
   void check_matrix() const;
 
diff --git a/cpp/src/dual_simplex/sparse_vector.cpp b/cpp/src/dual_simplex/sparse_vector.cpp
index 2d4745650..f33903fc5 100644
--- a/cpp/src/dual_simplex/sparse_vector.cpp
+++ b/cpp/src/dual_simplex/sparse_vector.cpp
@@ -28,6 +28,21 @@ sparse_vector_t<i_t, f_t>::sparse_vector_t(const csc_matrix_t<i_t, f_t>& A, i_t
   }
 }
 
+template <typename i_t, typename f_t>
+sparse_vector_t<i_t, f_t>::sparse_vector_t(const csr_matrix_t<i_t, f_t>& A, i_t row)
+{
+  const i_t row_start = A.row_start[row];
+  const i_t row_end = A.row_start[row+1];
+  const i_t nz = row_end - row_start;
+  n = A.n;
+  i.reserve(nz);
+  x.reserve(nz);
+  for (i_t k = row_start; k < row_end; ++k) {
+    i.push_back(A.j[k]);
+    x.push_back(A.x[k]);
+  }
+}
+
 template <typename i_t, typename f_t>
 void sparse_vector_t<i_t, f_t>::from_dense(const std::vector<f_t>& in)
 {
diff --git a/cpp/src/dual_simplex/sparse_vector.hpp b/cpp/src/dual_simplex/sparse_vector.hpp
index 7acfdc8b5..afa559926 100644
--- a/cpp/src/dual_simplex/sparse_vector.hpp
+++ b/cpp/src/dual_simplex/sparse_vector.hpp
@@ -25,6 +25,8 @@ class sparse_vector_t {
   sparse_vector_t(const std::vector<f_t>& in) { from_dense(in); }
   // Construct a sparse vector from a column of a CSC matrix
   sparse_vector_t(const csc_matrix_t<i_t, f_t>& A, i_t col);
+  // Construct a sparse vector from a row of a CSR matrix
+  sparse_vector_t(const csr_matrix_t<i_t, f_t>& A, i_t row);
   // gather a dense vector into a sparse vector
   void from_dense(const std::vector<f_t>& in);
   // convert a sparse vector into a CSC matrix with a single column
diff --git a/cpp/tests/dual_simplex/unit_tests/solve.cpp b/cpp/tests/dual_simplex/unit_tests/solve.cpp
index 7aed72fe0..0f5c1802b 100644
--- a/cpp/tests/dual_simplex/unit_tests/solve.cpp
+++ b/cpp/tests/dual_simplex/unit_tests/solve.cpp
@@ -326,4 +326,112 @@ TEST(dual_simplex, dual_variable_greater_than)
   EXPECT_NEAR(solution.z[1], 0.0, 1e-6);
 }
 
+
+TEST(dual_simplex, simple_cuts)
+{
+  // minimize x + y + 2 z
+  // subject to x + y + z == 1
+  //            x, y, z >= 0
+
+  raft::handle_t handle{};
+  cuopt::linear_programming::dual_simplex::user_problem_t<int, double> user_problem(&handle);
+  constexpr int m  = 1;
+  constexpr int n  = 3;
+  constexpr int nz = 3;
+
+  user_problem.num_rows = m;
+  user_problem.num_cols = n;
+  user_problem.objective.resize(n);
+  user_problem.objective[0] = 1.0;
+  user_problem.objective[1] = 1.0;
+  user_problem.objective[2] = 2.0;
+  user_problem.A.m          = m;
+  user_problem.A.n          = n;
+  user_problem.A.nz_max     = nz;
+  user_problem.A.reallocate(nz);
+  user_problem.A.col_start.resize(n + 1);
+  user_problem.A.col_start[0] = 0;
+  user_problem.A.col_start[1] = 1;
+  user_problem.A.col_start[2] = 2;
+  user_problem.A.col_start[3] = 3;
+  user_problem.A.i[0] = 0;
+  user_problem.A.x[0] = 1.0;
+  user_problem.A.i[1] = 0;
+  user_problem.A.x[1] = 1.0;
+  user_problem.A.i[2] = 0;
+  user_problem.A.x[2] = 1.0;
+  user_problem.lower.resize(n, 0.0);
+  user_problem.upper.resize(n, dual_simplex::inf);
+  user_problem.num_range_rows = 0;
+  user_problem.problem_name   = "simple_cuts";
+  user_problem.obj_scale = 1.0;
+  user_problem.obj_constant = 0.0;
+  user_problem.rhs.resize(m, 1.0);
+  user_problem.row_sense.resize(m, 'E');
+  user_problem.var_types.resize(n, cuopt::linear_programming::dual_simplex::variable_type_t::CONTINUOUS);
+
+  cuopt::init_logger_t logger("", true);
+
+  cuopt::linear_programming::dual_simplex::lp_problem_t<int, double> lp(user_problem.handle_ptr, 1, 1, 1);
+  cuopt::linear_programming::dual_simplex::simplex_solver_settings_t<int, double> settings;
+  settings.barrier = false;
+  settings.barrier_presolve = false;
+  settings.log.log = true;
+  settings.log.log_to_console = true;
+  settings.log.printf("Test print\n");
+  std::vector<int> new_slacks;
+  cuopt::linear_programming::dual_simplex::dualize_info_t<int, double> dualize_info;
+  cuopt::linear_programming::dual_simplex::convert_user_problem(user_problem, settings, lp, new_slacks, dualize_info);
+  cuopt::linear_programming::dual_simplex::lp_solution_t<int, double> solution(lp.num_rows, lp.num_cols);
+  std::vector<cuopt::linear_programming::dual_simplex::variable_status_t> vstatus;
+  std::vector<double> edge_norms;
+  std::vector<int> basic_list(lp.num_rows);
+  std::vector<int> nonbasic_list;
+  cuopt::linear_programming::dual_simplex::basis_update_mpf_t<int, double> basis_update(lp.num_cols, settings.refactor_frequency);
+  double start_time = dual_simplex::tic();
+  printf("Calling solve linear program with advanced basis\n");
+  EXPECT_EQ((cuopt::linear_programming::dual_simplex::solve_linear_program_with_advanced_basis(
+              lp, start_time, settings, solution, basis_update, basic_list, nonbasic_list, vstatus, edge_norms)),
+            cuopt::linear_programming::dual_simplex::lp_status_t::OPTIMAL);
+  printf("Solution objective: %e\n", solution.objective);
+  printf("Solution x: %e %e %e\n", solution.x[0], solution.x[1], solution.x[2]);
+  printf("Solution y: %e\n", solution.y[0]);
+  printf("Solution z: %e %e %e\n", solution.z[0], solution.z[1], solution.z[2]);
+  EXPECT_NEAR(solution.objective, 1.0, 1e-6);
+  EXPECT_NEAR(solution.x[0], 1.0, 1e-6);
+
+
+  // Add a cut z >= 1/3. Needs to be in the form  C*x <= d
+  csr_matrix_t<int, double> cuts(1, n, 1);
+  cuts.row_start[0] = 0;
+  cuts.j[0] = 2;
+  cuts.x[0] = -1.0;
+  cuts.row_start[1] = 1;
+  printf("cuts m %d n %d\n", cuts.m, cuts.n);
+  std::vector<double> cut_rhs(1);
+  cut_rhs[0] = -1.0 / 3.0;
+  EXPECT_EQ(cuopt::linear_programming::dual_simplex::solve_linear_program_with_cuts(
+            start_time, settings, cuts, cut_rhs, lp, solution, basis_update, basic_list, nonbasic_list, vstatus, edge_norms),
+            cuopt::linear_programming::dual_simplex::lp_status_t::OPTIMAL);
+  printf("Solution objective: %e\n", solution.objective);
+  printf("Solution x: %e %e %e\n", solution.x[0], solution.x[1], solution.x[2]);
+  EXPECT_NEAR(solution.objective, 4.0 / 3.0, 1e-6);
+
+  cuts.row_start.resize(3);
+  cuts.j[0] = 1;
+  cuts.row_start[2] = 2;
+  cuts.j[1] = 0;
+  cuts.x[1] = 1.0;
+  cuts.m = 2;
+  cut_rhs.resize(2);
+  cut_rhs[1] = 0.0;
+
+  EXPECT_EQ(cuopt::linear_programming::dual_simplex::solve_linear_program_with_cuts(
+            start_time, settings, cuts, cut_rhs, lp, solution, basis_update, basic_list, nonbasic_list, vstatus, edge_norms),
+            cuopt::linear_programming::dual_simplex::lp_status_t::OPTIMAL);
+  printf("Solution objective: %e\n", solution.objective);
+  printf("Solution x: %e %e %e\n", solution.x[0], solution.x[1], solution.x[2]);
+  EXPECT_NEAR(solution.objective, 4.0 / 3.0, 1e-6);
+}
+
 }  // namespace cuopt::linear_programming::dual_simplex::test

From 058433717476a417b56ee135d6e7c4c599742324 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Mon, 17 Nov 2025 14:11:43 -0800
Subject: [PATCH 02/27] Style fixes

---
 cpp/src/dual_simplex/basis_updates.cpp      | 36 ++++-----
 cpp/src/dual_simplex/solve.cpp              | 54 ++++++-------
 cpp/src/dual_simplex/sparse_matrix.cpp      | 21 +++--
 cpp/src/dual_simplex/sparse_vector.cpp      |  6 +-
 cpp/tests/dual_simplex/unit_tests/solve.cpp | 87 ++++++++++++++-------
 5 files changed, 111 insertions(+), 93 deletions(-)

diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp
index 2a8c34d1c..55a48c2ae 100644
--- a/cpp/src/dual_simplex/basis_updates.cpp
+++ b/cpp/src/dual_simplex/basis_updates.cpp
@@ -1146,13 +1146,13 @@ i_t basis_update_mpf_t<i_t, f_t>::append_cuts(const csr_matrix_t<i_t, f_t>& cuts
     // V(h, :) = T_0^{-T} ... T_{num_updates_ - 1}^{-T} W^T(:, h)
 
     csr_matrix_t<i_t, f_t> V_row(cuts_basic.m, m, 0);
-    i_t V_nz = 0;
+    i_t V_nz           = 0;
     const f_t zero_tol = 1e-13;
     for (i_t h = 0; h < cuts_basic.m; h++) {
-       sparse_vector_t rhs(WT, h);
-       scatter_into_workspace(rhs);
-       i_t nz = rhs.i.size();
-       for (i_t k = num_updates_ - 1; k >= 0; --k) {
+      sparse_vector_t rhs(WT, h);
+      scatter_into_workspace(rhs);
+      i_t nz = rhs.i.size();
+      for (i_t k = num_updates_ - 1; k >= 0; --k) {
         // T_k^{-T} = ( I - v u^T/(1 + u^T v))
         // T_k^{-T} * b = b - v * (u^T * b) / (1 + u^T * v) = b - theta * v, theta = u^T b / mu
 
@@ -1161,7 +1161,7 @@ i_t basis_update_mpf_t<i_t, f_t>::append_cuts(const csr_matrix_t<i_t, f_t>& cuts
         const f_t mu    = mu_values_[k];
 
         // dot = u^T * b
-        f_t dot = dot_product(u_col, xi_workspace_, x_workspace_);
+        f_t dot         = dot_product(u_col, xi_workspace_, x_workspace_);
         const f_t theta = dot / mu;
         if (std::abs(theta) > zero_tol) {
           add_sparse_column(S_, v_col, -theta, xi_workspace_, nz, x_workspace_);
@@ -1178,9 +1178,7 @@ i_t basis_update_mpf_t<i_t, f_t>::append_cuts(const csr_matrix_t<i_t, f_t>& cuts
     V_row.row_start[cuts_basic.m] = V_nz;
 
     V_row.to_compressed_col(V);
-  }
-  else
-  {
+  } else {
     // W = V
     WT.transpose(V);
   }
@@ -1198,16 +1196,16 @@ i_t basis_update_mpf_t<i_t, f_t>::append_cuts(const csr_matrix_t<i_t, f_t>& cuts
   csc_matrix_t<i_t, f_t> new_L(m + cuts_basic.m, m + cuts_basic.m, L_nz + V_nz + cuts_basic.m);
   L_nz = 0;
   for (i_t j = 0; j < m; ++j) {
-    new_L.col_start[j] = L_nz;
+    new_L.col_start[j]  = L_nz;
     const i_t col_start = L0_.col_start[j];
-    const i_t col_end = L0_.col_start[j + 1];
+    const i_t col_end   = L0_.col_start[j + 1];
     for (i_t p = col_start; p < col_end; ++p) {
       new_L.i[L_nz] = L0_.i[p];
       new_L.x[L_nz] = L0_.x[p];
       L_nz++;
     }
     const i_t V_col_start = V.col_start[j];
-    const i_t V_col_end = V.col_start[j + 1];
+    const i_t V_col_end   = V.col_start[j + 1];
     for (i_t p = V_col_start; p < V_col_end; ++p) {
       new_L.i[L_nz] = V.i[p] + m;
       new_L.x[L_nz] = V.x[p];
@@ -1216,15 +1214,14 @@ i_t basis_update_mpf_t<i_t, f_t>::append_cuts(const csr_matrix_t<i_t, f_t>& cuts
   }
   for (i_t j = m; j < m + cuts_basic.m; ++j) {
     new_L.col_start[j] = L_nz;
-    new_L.i[L_nz] = j;
-    new_L.x[L_nz] = 1.0;
+    new_L.i[L_nz]      = j;
+    new_L.x[L_nz]      = 1.0;
     L_nz++;
   }
   new_L.col_start[m + cuts_basic.m] = L_nz;
 
   L0_ = new_L;
 
-
   // Adjust U
   // U = [ U0 0 ]
   //     [ 0  I ]
@@ -1236,18 +1233,17 @@ i_t basis_update_mpf_t<i_t, f_t>::append_cuts(const csr_matrix_t<i_t, f_t>& cuts
   U0_.x.resize(U_nz + cuts_basic.m);
   for (i_t k = m; k < m + cuts_basic.m; ++k) {
     U0_.col_start[k] = U_nz;
-    U0_.i[U_nz] = k;
-    U0_.x[U_nz] = 1.0;
+    U0_.i[U_nz]      = k;
+    U0_.x[U_nz]      = 1.0;
     U_nz++;
   }
   U0_.col_start[m + cuts_basic.m] = U_nz;
-  U0_.n = m + cuts_basic.m;
-  U0_.m = m + cuts_basic.m;
+  U0_.n                           = m + cuts_basic.m;
+  U0_.m                           = m + cuts_basic.m;
 
   printf("Computing transposes\n");
   compute_transposes();
 
-
   // Adjust row_permutation_ and inverse_row_permutation_
   printf("Adjusting row_permutation_ and inverse_row_permutation_\n");
   row_permutation_.resize(m + cuts_basic.m);
diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp
index 5aa74906d..2754fa677 100644
--- a/cpp/src/dual_simplex/solve.cpp
+++ b/cpp/src/dual_simplex/solve.cpp
@@ -294,18 +294,18 @@ lp_status_t solve_linear_program_with_advanced_basis(
 }
 
 template <typename i_t, typename f_t>
-lp_status_t solve_linear_program_with_cuts(
-  const f_t start_time,
-  const simplex_solver_settings_t<i_t, f_t>& settings,
-  const csr_matrix_t<i_t, f_t>& cuts,
-  const std::vector<f_t>& cut_rhs,
-  lp_problem_t<i_t, f_t>& lp,
-  lp_solution_t<i_t, f_t>& solution,
-  basis_update_mpf_t<i_t, f_t>& basis_update,
-  std::vector<i_t>& basic_list,
-  std::vector<i_t>& nonbasic_list,
-  std::vector<variable_status_t>& vstatus,
-  std::vector<f_t>& edge_norms) {
+lp_status_t solve_linear_program_with_cuts(const f_t start_time,
+                                           const simplex_solver_settings_t<i_t, f_t>& settings,
+                                           const csr_matrix_t<i_t, f_t>& cuts,
+                                           const std::vector<f_t>& cut_rhs,
+                                           lp_problem_t<i_t, f_t>& lp,
+                                           lp_solution_t<i_t, f_t>& solution,
+                                           basis_update_mpf_t<i_t, f_t>& basis_update,
+                                           std::vector<i_t>& basic_list,
+                                           std::vector<i_t>& nonbasic_list,
+                                           std::vector<variable_status_t>& vstatus,
+                                           std::vector<f_t>& edge_norms)
+{
   // Given a set of cuts: C*x <= d that are currently violated
   // by the current solution x* (i.e. C*x* > d), this function
   // adds the cuts into the LP and solves again.
@@ -323,7 +323,6 @@ lp_status_t solve_linear_program_with_cuts(
   printf("Converting A to compressed row\n");
   lp.A.to_compressed_row(new_A_row);
 
-
   printf("Appening cuts\n");
   new_A_row.append_rows(cuts);
 
@@ -332,7 +331,6 @@ lp_status_t solve_linear_program_with_cuts(
   new_A_row.to_compressed_col(new_A_col);
   printf("new A col rows %d cols %d\n", new_A_col.m, new_A_col.n);
 
-
   printf("Adding slacks\n");
   // Add in slacks variables for the new rows
   lp.lower.resize(lp.num_cols + p);
@@ -345,19 +343,19 @@ lp_status_t solve_linear_program_with_cuts(
   i_t k = lp.num_rows;
   for (i_t j = lp.num_cols; j < lp.num_cols + p; j++) {
     new_A_col.col_start[j] = nz;
-    new_A_col.i[nz] = k++;
-    new_A_col.x[nz] = 1.0;
+    new_A_col.i[nz]        = k++;
+    new_A_col.x[nz]        = 1.0;
     nz++;
-    lp.lower[j] = 0.0;
-    lp.upper[j] = inf;
+    lp.lower[j]     = 0.0;
+    lp.upper[j]     = inf;
     lp.objective[j] = 0.0;
   }
   new_A_col.col_start[lp.num_cols + p] = nz;
-  new_A_col.n = lp.num_cols + p;
+  new_A_col.n                          = lp.num_cols + p;
   printf("new A col rows %d cols %d\n", new_A_col.m, new_A_col.n);
   printf("new A nnz %d\n", new_A_col.col_start[lp.num_cols + p]);
 
-  lp.A = new_A_col;
+  lp.A         = new_A_col;
   i_t old_rows = lp.num_rows;
   lp.num_rows += p;
   printf("lp rows %d A rows %d\n", lp.num_rows, lp.A.m);
@@ -372,10 +370,9 @@ lp_status_t solve_linear_program_with_cuts(
   lp.rhs.resize(lp.num_rows);
   for (i_t k = old_rows; k < old_rows + p; k++) {
     const i_t h = k - old_rows;
-    lp.rhs[k] = cut_rhs[h];
+    lp.rhs[k]   = cut_rhs[h];
   }
 
-
   printf("Constructing column degree\n");
   // Construct C_B = C(:, basic_list)
   std::vector<i_t> C_col_degree(p, 0);
@@ -387,7 +384,7 @@ lp_status_t solve_linear_program_with_cuts(
 
   std::vector<i_t> in_basis(old_cols, 0);
   const i_t num_basic = static_cast<i_t>(basic_list.size());
-  i_t C_B_nz = 0;
+  i_t C_B_nz          = 0;
   for (i_t k = 0; k < num_basic; k++) {
     const i_t j = basic_list[k];
     in_basis[j] = 1;
@@ -398,9 +395,9 @@ lp_status_t solve_linear_program_with_cuts(
   csr_matrix_t<i_t, f_t> C_B(num_basic, num_basic, C_B_nz);
   nz = 0;
   for (i_t i = 0; i < p; i++) {
-    C_B.row_start[i] = nz;
+    C_B.row_start[i]    = nz;
     const i_t row_start = cuts.row_start[i];
-    const i_t row_end = cuts.row_start[i+1];
+    const i_t row_end   = cuts.row_start[i + 1];
     for (i_t q = row_start; q < row_end; q++) {
       const i_t j = cuts.j[q];
       if (in_basis[j] == 0) { continue; }
@@ -411,15 +408,12 @@ lp_status_t solve_linear_program_with_cuts(
   }
   C_B.row_start[p] = nz;
   settings.log.printf("predicted nz %d actual nz %d\n", C_B_nz, nz);
-  if (nz != C_B_nz) {
-    exit(1);
-  }
+  if (nz != C_B_nz) { exit(1); }
 
   printf("Adjusting basis update\n");
   // Adjust the basis update to include the new cuts
   basis_update.append_cuts(C_B);
 
-
   // Adjust the vstatus
   vstatus.resize(lp.num_cols);
   for (i_t j = old_cols; j < lp.num_cols; j++) {
@@ -445,7 +439,7 @@ lp_status_t solve_linear_program_with_cuts(
 
   // For now just clear the edge norms
   edge_norms.clear();
-  i_t iter = 0;
+  i_t iter              = 0;
   dual::status_t status = dual_phase2_with_advanced_basis(2,
                                                           0,
                                                           false,
diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp
index c2fc343ce..1263fa63e 100644
--- a/cpp/src/dual_simplex/sparse_matrix.cpp
+++ b/cpp/src/dual_simplex/sparse_matrix.cpp
@@ -360,17 +360,17 @@ i_t csc_matrix_t<i_t, f_t>::remove_row(i_t row)
 template <typename i_t, typename f_t>
 i_t csr_matrix_t<i_t, f_t>::append_rows(const csr_matrix_t<i_t, f_t>& C)
 {
-  const i_t old_m = this->m;
-  const i_t n = this->n;
+  const i_t old_m  = this->m;
+  const i_t n      = this->n;
   const i_t old_nz = this->row_start[old_m];
-  const i_t C_row = C.m;
+  const i_t C_row  = C.m;
   if (0 && C.n != n) {
     printf("C n %d != n %d\n", C.n, n);
     return -1;
   }
-  const i_t C_nz = C.row_start[C_row];
+  const i_t C_nz   = C.row_start[C_row];
   const i_t new_nz = old_nz + C_nz;
-  const i_t new_m = old_m + C_row;
+  const i_t new_m  = old_m + C_row;
   printf("old m %d C_row %d new m %d\n", old_m, C_row, new_m);
 
   this->j.resize(new_nz);
@@ -379,8 +379,8 @@ i_t csr_matrix_t<i_t, f_t>::append_rows(const csr_matrix_t<i_t, f_t>& C)
 
   i_t nz = old_nz;
   for (i_t i = old_m; i < new_m; i++) {
-    const i_t k = i - old_m;
-    const i_t nz_row = C.row_start[k+1] - C.row_start[k];
+    const i_t k        = i - old_m;
+    const i_t nz_row   = C.row_start[k + 1] - C.row_start[k];
     this->row_start[i] = nz;
     nz += nz_row;
   }
@@ -388,20 +388,19 @@ i_t csr_matrix_t<i_t, f_t>::append_rows(const csr_matrix_t<i_t, f_t>& C)
 
   for (i_t p = old_nz; p < new_nz; p++) {
     const i_t q = p - old_nz;
-    this->j[p] = C.j[q];
+    this->j[p]  = C.j[q];
   }
 
   for (i_t p = old_nz; p < new_nz; p++) {
     const i_t q = p - old_nz;
-    this->x[p] = C.x[q];
+    this->x[p]  = C.x[q];
   }
 
-  this->m = new_m;
+  this->m      = new_m;
   this->nz_max = new_nz;
   return 0;
 }
 
-
 template <typename i_t, typename f_t>
 void csc_matrix_t<i_t, f_t>::print_matrix(FILE* fid) const
 {
diff --git a/cpp/src/dual_simplex/sparse_vector.cpp b/cpp/src/dual_simplex/sparse_vector.cpp
index f33903fc5..46bae286c 100644
--- a/cpp/src/dual_simplex/sparse_vector.cpp
+++ b/cpp/src/dual_simplex/sparse_vector.cpp
@@ -32,9 +32,9 @@ template <typename i_t, typename f_t>
 sparse_vector_t<i_t, f_t>::sparse_vector_t(const csr_matrix_t<i_t, f_t>& A, i_t row)
 {
   const i_t row_start = A.row_start[row];
-  const i_t row_end = A.row_start[row+1];
-  const i_t nz = row_end - row_start;
-  n = A.n;
+  const i_t row_end   = A.row_start[row + 1];
+  const i_t nz        = row_end - row_start;
+  n                   = A.n;
   i.reserve(nz);
   x.reserve(nz);
   for (i_t k = row_start; k < row_end; ++k) {
diff --git a/cpp/tests/dual_simplex/unit_tests/solve.cpp b/cpp/tests/dual_simplex/unit_tests/solve.cpp
index 0f5c1802b..41a3a8e62 100644
--- a/cpp/tests/dual_simplex/unit_tests/solve.cpp
+++ b/cpp/tests/dual_simplex/unit_tests/solve.cpp
@@ -326,7 +326,6 @@ TEST(dual_simplex, dual_variable_greater_than)
   EXPECT_NEAR(solution.z[1], 0.0, 1e-6);
 }
 
-
 TEST(dual_simplex, simple_cuts)
 {
   // minimize x + y + 2 z
@@ -354,44 +353,57 @@ TEST(dual_simplex, simple_cuts)
   user_problem.A.col_start[1] = 1;
   user_problem.A.col_start[2] = 2;
   user_problem.A.col_start[3] = 3;
-  user_problem.A.i[0] = 0;
-  user_problem.A.x[0] = 1.0;
-  user_problem.A.i[1] = 0;
-  user_problem.A.x[1] = 1.0;
-  user_problem.A.i[2] = 0;
-  user_problem.A.x[2] = 1.0;
+  user_problem.A.i[0]         = 0;
+  user_problem.A.x[0]         = 1.0;
+  user_problem.A.i[1]         = 0;
+  user_problem.A.x[1]         = 1.0;
+  user_problem.A.i[2]         = 0;
+  user_problem.A.x[2]         = 1.0;
   user_problem.lower.resize(n, 0.0);
   user_problem.upper.resize(n, dual_simplex::inf);
   user_problem.num_range_rows = 0;
   user_problem.problem_name   = "simple_cuts";
-  user_problem.obj_scale = 1.0;
-  user_problem.obj_constant = 0.0;
+  user_problem.obj_scale      = 1.0;
+  user_problem.obj_constant   = 0.0;
   user_problem.rhs.resize(m, 1.0);
   user_problem.row_sense.resize(m, 'E');
-  user_problem.var_types.resize(n, cuopt::linear_programming::dual_simplex::variable_type_t::CONTINUOUS);
+  user_problem.var_types.resize(
+    n, cuopt::linear_programming::dual_simplex::variable_type_t::CONTINUOUS);
 
   cuopt::init_logger_t logger("", true);
 
-  cuopt::linear_programming::dual_simplex::lp_problem_t<int, double> lp(user_problem.handle_ptr, 1, 1, 1);
+  cuopt::linear_programming::dual_simplex::lp_problem_t<int, double> lp(
+    user_problem.handle_ptr, 1, 1, 1);
   cuopt::linear_programming::dual_simplex::simplex_solver_settings_t<int, double> settings;
-  settings.barrier = false;
-  settings.barrier_presolve = false;
-  settings.log.log = true;
+  settings.barrier            = false;
+  settings.barrier_presolve   = false;
+  settings.log.log            = true;
   settings.log.log_to_console = true;
   settings.log.printf("Test print\n");
   std::vector<int> new_slacks;
   cuopt::linear_programming::dual_simplex::dualize_info_t<int, double> dualize_info;
-  cuopt::linear_programming::dual_simplex::convert_user_problem(user_problem, settings, lp, new_slacks, dualize_info);
-  cuopt::linear_programming::dual_simplex::lp_solution_t<int, double> solution(lp.num_rows, lp.num_cols);
+  cuopt::linear_programming::dual_simplex::convert_user_problem(
+    user_problem, settings, lp, new_slacks, dualize_info);
+  cuopt::linear_programming::dual_simplex::lp_solution_t<int, double> solution(lp.num_rows,
+                                                                               lp.num_cols);
   std::vector<cuopt::linear_programming::dual_simplex::variable_status_t> vstatus;
   std::vector<double> edge_norms;
   std::vector<int> basic_list(lp.num_rows);
   std::vector<int> nonbasic_list;
-  cuopt::linear_programming::dual_simplex::basis_update_mpf_t<int, double> basis_update(lp.num_cols, settings.refactor_frequency);
+  cuopt::linear_programming::dual_simplex::basis_update_mpf_t<int, double> basis_update(
+    lp.num_cols, settings.refactor_frequency);
   double start_time = dual_simplex::tic();
   printf("Calling solve linear program with advanced basis\n");
   EXPECT_EQ((cuopt::linear_programming::dual_simplex::solve_linear_program_with_advanced_basis(
-              lp, start_time, settings, solution, basis_update, basic_list, nonbasic_list, vstatus, edge_norms)),
+              lp,
+              start_time,
+              settings,
+              solution,
+              basis_update,
+              basic_list,
+              nonbasic_list,
+              vstatus,
+              edge_norms)),
             cuopt::linear_programming::dual_simplex::lp_status_t::OPTIMAL);
   printf("Solution objective: %e\n", solution.objective);
   printf("Solution x: %e %e %e\n", solution.x[0], solution.x[1], solution.x[2]);
@@ -400,34 +412,51 @@ TEST(dual_simplex, simple_cuts)
   EXPECT_NEAR(solution.objective, 1.0, 1e-6);
   EXPECT_NEAR(solution.x[0], 1.0, 1e-6);
 
-
   // Add a cut z >= 1/3. Needs to be in the form  C*x <= d
   csr_matrix_t<int, double> cuts(1, n, 1);
   cuts.row_start[0] = 0;
-  cuts.j[0] = 2;
-  cuts.x[0] = -1.0;
+  cuts.j[0]         = 2;
+  cuts.x[0]         = -1.0;
   cuts.row_start[1] = 1;
   printf("cuts m %d n %d\n", cuts.m, cuts.n);
   std::vector<double> cut_rhs(1);
   cut_rhs[0] = -1.0 / 3.0;
-  EXPECT_EQ(cuopt::linear_programming::dual_simplex::solve_linear_program_with_cuts(
-            start_time, settings, cuts, cut_rhs, lp, solution, basis_update, basic_list, nonbasic_list, vstatus, edge_norms),
+  EXPECT_EQ(cuopt::linear_programming::dual_simplex::solve_linear_program_with_cuts(start_time,
+                                                                                    settings,
+                                                                                    cuts,
+                                                                                    cut_rhs,
+                                                                                    lp,
+                                                                                    solution,
+                                                                                    basis_update,
+                                                                                    basic_list,
+                                                                                    nonbasic_list,
+                                                                                    vstatus,
+                                                                                    edge_norms),
             cuopt::linear_programming::dual_simplex::lp_status_t::OPTIMAL);
   printf("Solution objective: %e\n", solution.objective);
   printf("Solution x: %e %e %e\n", solution.x[0], solution.x[1], solution.x[2]);
   EXPECT_NEAR(solution.objective, 4.0 / 3.0, 1e-6);
 
   cuts.row_start.resize(3);
-  cuts.j[0] = 1;
+  cuts.j[0]         = 1;
   cuts.row_start[2] = 2;
-  cuts.j[1] = 0;
-  cuts.x[1] = 1.0;
-  cuts.m = 2;
+  cuts.j[1]         = 0;
+  cuts.x[1]         = 1.0;
+  cuts.m            = 2;
   cut_rhs.resize(2);
   cut_rhs[1] = 0.0;
 
-  EXPECT_EQ(cuopt::linear_programming::dual_simplex::solve_linear_program_with_cuts(
-            start_time, settings, cuts, cut_rhs, lp, solution, basis_update, basic_list, nonbasic_list, vstatus, edge_norms),
+  EXPECT_EQ(cuopt::linear_programming::dual_simplex::solve_linear_program_with_cuts(start_time,
+                                                                                    settings,
+                                                                                    cuts,
+                                                                                    cut_rhs,
+                                                                                    lp,
+                                                                                    solution,
+                                                                                    basis_update,
+                                                                                    basic_list,
+                                                                                    nonbasic_list,
+                                                                                    vstatus,
+                                                                                    edge_norms),
             cuopt::linear_programming::dual_simplex::lp_status_t::OPTIMAL);
   printf("Solution objective: %e\n", solution.objective);
   printf("Solution x: %e %e %e\n", solution.x[0], solution.x[1], solution.x[2]);

From 74fff991b4ca0a47bf076b9edc767cdcb745c038 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Mon, 17 Nov 2025 14:17:05 -0800
Subject: [PATCH 03/27] Remove debugging

---
 cpp/src/dual_simplex/basis_updates.cpp |  8 -------
 cpp/src/dual_simplex/solve.cpp         | 30 ++++----------------------
 cpp/src/dual_simplex/sparse_matrix.cpp |  4 +---
 3 files changed, 5 insertions(+), 37 deletions(-)

diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp
index 55a48c2ae..5c7834cda 100644
--- a/cpp/src/dual_simplex/basis_updates.cpp
+++ b/cpp/src/dual_simplex/basis_updates.cpp
@@ -1118,7 +1118,6 @@ i_t basis_update_mpf_t<i_t, f_t>::append_cuts(const csr_matrix_t<i_t, f_t>& cuts
   // Solve for U^T W^T = C_B^T
   // We do this one row at a time of C_B
   csc_matrix_t<i_t, f_t> WT(m, cuts_basic.m, 0);
-  printf("Constructing WT\n");
 
   i_t WT_nz = 0;
   for (i_t k = 0; k < cuts_basic.m; k++) {
@@ -1133,8 +1132,6 @@ i_t basis_update_mpf_t<i_t, f_t>::append_cuts(const csr_matrix_t<i_t, f_t>& cuts
   }
   WT.col_start[cuts_basic.m] = WT_nz;
 
-  printf("Constructing V (num updates %d)\n", num_updates_);
-
   csc_matrix_t<i_t, f_t> V(cuts_basic.m, m, 0);
   if (num_updates_ > 0) {
     // W = V T_0 ... T_{num_updates_ - 1}
@@ -1189,7 +1186,6 @@ i_t basis_update_mpf_t<i_t, f_t>::append_cuts(const csr_matrix_t<i_t, f_t>& cuts
   // Adjust L and U
   // L = [ L0  0 ]
   //     [ V   I ]
-  printf("Adjusting L\n");
 
   i_t V_nz = V.col_start[m];
   i_t L_nz = L0_.col_start[m];
@@ -1225,7 +1221,6 @@ i_t basis_update_mpf_t<i_t, f_t>::append_cuts(const csr_matrix_t<i_t, f_t>& cuts
   // Adjust U
   // U = [ U0 0 ]
   //     [ 0  I ]
-  printf("Adjusting U\n");
 
   i_t U_nz = U0_.col_start[m];
   U0_.col_start.resize(m + cuts_basic.m + 1);
@@ -1241,11 +1236,9 @@ i_t basis_update_mpf_t<i_t, f_t>::append_cuts(const csr_matrix_t<i_t, f_t>& cuts
   U0_.n                           = m + cuts_basic.m;
   U0_.m                           = m + cuts_basic.m;
 
-  printf("Computing transposes\n");
   compute_transposes();
 
   // Adjust row_permutation_ and inverse_row_permutation_
-  printf("Adjusting row_permutation_ and inverse_row_permutation_\n");
   row_permutation_.resize(m + cuts_basic.m);
   inverse_row_permutation_.resize(m + cuts_basic.m);
   for (i_t k = m; k < m + cuts_basic.m; ++k) {
@@ -1254,7 +1247,6 @@ i_t basis_update_mpf_t<i_t, f_t>::append_cuts(const csr_matrix_t<i_t, f_t>& cuts
   inverse_permutation(row_permutation_, inverse_row_permutation_);
 
   // Adjust workspace sizes
-  printf("Adjusting workspace sizes\n");
   xi_workspace_.resize(2 * (m + cuts_basic.m), 0);
   x_workspace_.resize(m + cuts_basic.m, 0.0);
 
diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp
index 2754fa677..f8fbd66c1 100644
--- a/cpp/src/dual_simplex/solve.cpp
+++ b/cpp/src/dual_simplex/solve.cpp
@@ -315,23 +315,18 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time,
     settings.log.printf("cut_rhs must have the same number of rows as cuts\n");
     return lp_status_t::NUMERICAL_ISSUES;
   }
-  printf("Number of cuts %d\n", p);
-  printf("Original lp rows %d\n", lp.num_rows);
-  printf("Original lp cols %d\n", lp.num_cols);
+  settings.log.printf("Number of cuts %d\n", p);
+  settings.log.printf("Original lp rows %d\n", lp.num_rows);
+  settings.log.printf("Original lp cols %d\n", lp.num_cols);
 
   csr_matrix_t<i_t, f_t> new_A_row(lp.num_rows, lp.num_cols, 1);
-  printf("Converting A to compressed row\n");
   lp.A.to_compressed_row(new_A_row);
 
-  printf("Appening cuts\n");
   new_A_row.append_rows(cuts);
 
-  printf("Converting back to compressed column\n");
   csc_matrix_t<i_t, f_t> new_A_col(lp.num_rows + p, lp.num_cols, 1);
   new_A_row.to_compressed_col(new_A_col);
-  printf("new A col rows %d cols %d\n", new_A_col.m, new_A_col.n);
 
-  printf("Adding slacks\n");
   // Add in slacks variables for the new rows
   lp.lower.resize(lp.num_cols + p);
   lp.upper.resize(lp.num_cols + p);
@@ -352,28 +347,20 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time,
   }
   new_A_col.col_start[lp.num_cols + p] = nz;
   new_A_col.n                          = lp.num_cols + p;
-  printf("new A col rows %d cols %d\n", new_A_col.m, new_A_col.n);
-  printf("new A nnz %d\n", new_A_col.col_start[lp.num_cols + p]);
 
   lp.A         = new_A_col;
   i_t old_rows = lp.num_rows;
   lp.num_rows += p;
-  printf("lp rows %d A rows %d\n", lp.num_rows, lp.A.m);
   i_t old_cols = lp.num_cols;
   lp.num_cols += p;
-  printf("lp cols %d A cols %d\n", lp.num_cols, lp.A.n);
 
-  printf("New A matrix\n");
-  lp.A.print_matrix(stdout);
 
-  printf("Adding rhs\n");
   lp.rhs.resize(lp.num_rows);
   for (i_t k = old_rows; k < old_rows + p; k++) {
     const i_t h = k - old_rows;
     lp.rhs[k]   = cut_rhs[h];
   }
 
-  printf("Constructing column degree\n");
   // Construct C_B = C(:, basic_list)
   std::vector<i_t> C_col_degree(p, 0);
   i_t cuts_nz = cuts.row_start[p];
@@ -391,7 +378,6 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time,
     C_B_nz += C_col_degree[j];
   }
 
-  printf("Constructing C_B\n");
   csr_matrix_t<i_t, f_t> C_B(num_basic, num_basic, C_B_nz);
   nz = 0;
   for (i_t i = 0; i < p; i++) {
@@ -408,9 +394,8 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time,
   }
   C_B.row_start[p] = nz;
   settings.log.printf("predicted nz %d actual nz %d\n", C_B_nz, nz);
-  if (nz != C_B_nz) { exit(1); }
+  if (nz != C_B_nz) { return lp_status_t::NUMERICAL_ISSUES; }
 
-  printf("Adjusting basis update\n");
   // Adjust the basis update to include the new cuts
   basis_update.append_cuts(C_B);
 
@@ -425,13 +410,6 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time,
   for (i_t j = old_rows; j < lp.num_rows; j++) {
     basic_list[j] = h++;
   }
-
-  printf("basic list\n");
-  for (i_t k = 0; k < basic_list.size(); k++) {
-    printf("%d ", basic_list[k]);
-  }
-  printf("\n");
-
   // Adjust the solution
   solution.x.resize(lp.num_cols, 0.0);
   solution.y.resize(lp.num_rows, 0.0);
diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp
index 1263fa63e..3e01c2f9c 100644
--- a/cpp/src/dual_simplex/sparse_matrix.cpp
+++ b/cpp/src/dual_simplex/sparse_matrix.cpp
@@ -364,14 +364,12 @@ i_t csr_matrix_t<i_t, f_t>::append_rows(const csr_matrix_t<i_t, f_t>& C)
   const i_t n      = this->n;
   const i_t old_nz = this->row_start[old_m];
   const i_t C_row  = C.m;
-  if (0 && C.n != n) {
-    printf("C n %d != n %d\n", C.n, n);
+  if (C.n > n) {
     return -1;
   }
   const i_t C_nz   = C.row_start[C_row];
   const i_t new_nz = old_nz + C_nz;
   const i_t new_m  = old_m + C_row;
-  printf("old m %d C_row %d new m %d\n", old_m, C_row, new_m);
 
   this->j.resize(new_nz);
   this->x.resize(new_nz);

From 18828927b62bab0c6fd649aba5ec7ea5b9960c91 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Mon, 24 Nov 2025 17:04:09 -0800
Subject: [PATCH 04/27] Fix issues in adding cuts. Add gomory cuts. Temporarily
 disable MIP heursitics and MIP presolve for experimentation

---
 cpp/src/dual_simplex/basis_updates.cpp    |  61 ++++
 cpp/src/dual_simplex/branch_and_bound.cpp | 410 ++++++++++++++++++++--
 cpp/src/dual_simplex/phase2.cpp           |  33 ++
 cpp/src/dual_simplex/solve.cpp            |  72 +++-
 cpp/src/dual_simplex/sparse_matrix.cpp    |  27 ++
 cpp/src/dual_simplex/sparse_matrix.hpp    |   4 +
 cpp/src/mip/solver.cu                     |  10 +-
 7 files changed, 575 insertions(+), 42 deletions(-)

diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp
index 5c7834cda..2590c5226 100644
--- a/cpp/src/dual_simplex/basis_updates.cpp
+++ b/cpp/src/dual_simplex/basis_updates.cpp
@@ -1132,6 +1132,27 @@ i_t basis_update_mpf_t<i_t, f_t>::append_cuts(const csr_matrix_t<i_t, f_t>& cuts
   }
   WT.col_start[cuts_basic.m] = WT_nz;
 
+
+#ifdef CHECK_W
+  {
+    for (i_t k = 0; k < cuts_basic.m; k++) {
+      std::vector<f_t> WT_col(m, 0.0);
+      WT.load_a_column(k, WT_col);
+      std::vector<f_t> CBT_col(m, 0.0);
+      matrix_transpose_vector_multiply(U0_, 1.0, WT_col, 0.0, CBT_col);
+      sparse_vector_t<i_t, f_t> CBT_col_sparse(cuts_basic, k);
+      std::vector<f_t> CBT_col_dense(m);
+      CBT_col_sparse.to_dense(CBT_col_dense);
+      for (i_t h = 0; h < m; h++) {
+        if (std::abs(CBT_col_dense[h] - CBT_col[h]) > 1e-6) {
+          printf("col %d CBT_col_dense[%d] = %e CBT_col[%d] = %e\n", k, h, CBT_col_dense[h], h, CBT_col[h]);
+          exit(1);
+        }
+      }
+    }
+  }
+#endif
+
   csc_matrix_t<i_t, f_t> V(cuts_basic.m, m, 0);
   if (num_updates_ > 0) {
     // W = V T_0 ... T_{num_updates_ - 1}
@@ -1141,6 +1162,8 @@ i_t basis_update_mpf_t<i_t, f_t>::append_cuts(const csr_matrix_t<i_t, f_t>& cuts
     // V^T(:, h) = T_0^{-T} ... T_{num_updates_ - 1}^{-T} W^T(:, h)
     // or
     // V(h, :) = T_0^{-T} ... T_{num_updates_ - 1}^{-T} W^T(:, h)
+    // So we can form V row by row in CSR and then covert it to CSC
+    // for appending to L0
 
     csr_matrix_t<i_t, f_t> V_row(cuts_basic.m, m, 0);
     i_t V_nz           = 0;
@@ -1175,6 +1198,39 @@ i_t basis_update_mpf_t<i_t, f_t>::append_cuts(const csr_matrix_t<i_t, f_t>& cuts
     V_row.row_start[cuts_basic.m] = V_nz;
 
     V_row.to_compressed_col(V);
+
+
+#ifdef CHECK_V
+    csc_matrix_t<i_t, f_t> CB_col(cuts_basic.m, m, 0);
+    cuts_basic.to_compressed_col(CB_col);
+    for (i_t k = 0; k < m; k++) {
+      std::vector<f_t> U_col(m, 0.0);
+      U0_.load_a_column(k, U_col);
+      for (i_t h = num_updates_ - 1; h >= 0; --h) {
+        // T_h = ( I + u_h v_h^T)
+        // T_h * x = x + u_h * v_h^T * x = x + theta * u_h
+        const i_t u_col = 2 * h;
+        const i_t v_col = 2 * h + 1;
+        f_t theta = dot_product(v_col, U_col);
+        const i_t col_start = S_.col_start[u_col];
+        const i_t col_end = S_.col_start[u_col + 1];
+        for (i_t p = col_start; p < col_end; ++p) {
+          const i_t i = S_.i[p];
+          U_col[i] += theta * S_.x[p];
+        }
+      }
+      std::vector<f_t> CB_column(cuts_basic.m, 0.0);
+      matrix_vector_multiply(V, 1.0, U_col, 0.0, CB_column);
+      std::vector<f_t> CB_col_dense(cuts_basic.m);
+      CB_col.load_a_column(k, CB_col_dense);
+      for (i_t l = 0; l < cuts_basic.m; l++) {
+        if (std::abs(CB_col_dense[l] - CB_column[l]) > 1e-6) {
+          printf("col %d CB_col_dense[%d] = %e CB_column[%d] = %e\n", k, l, CB_col_dense[l], l, CB_column[l]);
+          exit(1);
+        }
+      }
+    }
+#endif
   } else {
     // W = V
     WT.transpose(V);
@@ -1190,6 +1246,7 @@ i_t basis_update_mpf_t<i_t, f_t>::append_cuts(const csr_matrix_t<i_t, f_t>& cuts
   i_t V_nz = V.col_start[m];
   i_t L_nz = L0_.col_start[m];
   csc_matrix_t<i_t, f_t> new_L(m + cuts_basic.m, m + cuts_basic.m, L_nz + V_nz + cuts_basic.m);
+  i_t predicted_nz = L_nz + V_nz + cuts_basic.m;
   L_nz = 0;
   for (i_t j = 0; j < m; ++j) {
     new_L.col_start[j]  = L_nz;
@@ -1215,6 +1272,10 @@ i_t basis_update_mpf_t<i_t, f_t>::append_cuts(const csr_matrix_t<i_t, f_t>& cuts
     L_nz++;
   }
   new_L.col_start[m + cuts_basic.m] = L_nz;
+  if (L_nz != predicted_nz) {
+    printf("L_nz %d predicted_nz %d\n", L_nz, predicted_nz);
+    exit(1);
+  }
 
   L0_ = new_L;
 
diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp
index 9f207b6a6..aed49ad4f 100644
--- a/cpp/src/dual_simplex/branch_and_bound.cpp
+++ b/cpp/src/dual_simplex/branch_and_bound.cpp
@@ -8,6 +8,7 @@
 #include <omp.h>
 #include <algorithm>
 #include <dual_simplex/branch_and_bound.hpp>
+#include <dual_simplex/basis_solves.hpp>
 #include <dual_simplex/initial_basis.hpp>
 #include <dual_simplex/logger.hpp>
 #include <dual_simplex/mip_node.hpp>
@@ -206,6 +207,10 @@ branch_and_bound_t<i_t, f_t>::branch_and_bound_t(
 {
   stats_.start_time = tic();
   dualize_info_t<i_t, f_t> dualize_info;
+#ifdef PRINT_A
+  settings_.log.printf("A");
+  original_problem_.A.print_matrix();
+#endif
   convert_user_problem(original_problem_, settings_, original_lp_, new_slacks_, dualize_info);
   full_variable_types(original_problem_, original_lp_, var_types_);
 
@@ -1062,8 +1067,12 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   settings_.log.printf("Solving LP root relaxation\n");
   simplex_solver_settings_t lp_settings = settings_;
   lp_settings.inside_mip                = 1;
-  lp_status_t root_status               = solve_linear_program_advanced(
-    original_lp_, stats_.start_time, lp_settings, root_relax_soln_, root_vstatus_, edge_norms_);
+  lp_settings.scale_columns = false;
+  std::vector<i_t> basic_list(original_lp_.num_rows);
+  std::vector<i_t> nonbasic_list;
+  basis_update_mpf_t<i_t, f_t> basis_update(original_lp_.num_rows, settings_.refactor_frequency);
+  lp_status_t root_status               = solve_linear_program_with_advanced_basis(
+    original_lp_, stats_.start_time, lp_settings, root_relax_soln_, basis_update, basic_list, nonbasic_list, root_vstatus_, edge_norms_);
   stats_.total_lp_iters      = root_relax_soln_.iterations;
   stats_.total_lp_solve_time = toc(stats_.start_time);
   if (root_status == lp_status_t::INFEASIBLE) {
@@ -1111,31 +1120,384 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   }
 
   std::vector<i_t> fractional;
-  const i_t num_fractional =
+  i_t num_fractional =
     fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional);
 
-  if (num_fractional == 0) {
-    mutex_upper_.lock();
-    incumbent_.set_incumbent_solution(root_objective_, root_relax_soln_.x);
-    upper_bound_ = root_objective_;
-    mutex_upper_.unlock();
-    // We should be done here
-    uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x);
-    solution.objective          = incumbent_.objective;
-    solution.lower_bound        = root_objective_;
-    solution.nodes_explored     = 0;
-    solution.simplex_iterations = root_relax_soln_.iterations;
-    settings_.log.printf("Optimal solution found at root node. Objective %.16e. Time %.2f.\n",
-                         compute_user_objective(original_lp_, root_objective_),
-                         toc(stats_.start_time));
+  csc_matrix_t<i_t, f_t> Arow(1, 1, 1);
+  original_lp_.A.transpose(Arow);
 
-    if (settings_.solution_callback != nullptr) {
-      settings_.solution_callback(solution.x, solution.objective);
-    }
-    if (settings_.heuristic_preemption_callback != nullptr) {
-      settings_.heuristic_preemption_callback();
+  for (i_t cut_pass = 0; cut_pass < 10; cut_pass++) {
+    if (num_fractional == 0) {
+      for (i_t j = 0; j < original_lp_.num_cols; j++) {
+        if (var_types_[j] == variable_type_t::INTEGER) {
+          settings_.log.printf("Variable %d type %d val %e\n", j, var_types_[j], root_relax_soln_.x[j]);
+        }
+      }
+      mutex_upper_.lock();
+      incumbent_.set_incumbent_solution(root_objective_, root_relax_soln_.x);
+      upper_bound_ = root_objective_;
+      mutex_upper_.unlock();
+      // We should be done here
+      uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x);
+      solution.objective          = incumbent_.objective;
+      solution.lower_bound        = root_objective_;
+      solution.nodes_explored     = 0;
+      solution.simplex_iterations = root_relax_soln_.iterations;
+      settings_.log.printf("Optimal solution found at root node. Objective %.16e. Time %.2f.\n",
+                           compute_user_objective(original_lp_, root_objective_),
+                           toc(stats_.start_time));
+
+      if (settings_.solution_callback != nullptr) {
+        settings_.solution_callback(solution.x, solution.objective);
+      }
+      if (settings_.heuristic_preemption_callback != nullptr) {
+        settings_.heuristic_preemption_callback();
+      }
+      return mip_status_t::OPTIMAL;
+    } else {
+      settings_.log.printf("Found %d fractional variables on cut pass %d\n", num_fractional, cut_pass);
+      for (i_t j: fractional) {
+        settings_.log.printf("Fractional variable %d lower %e value %e upper %e\n", j, original_lp_.lower[j], root_relax_soln_.x[j], original_lp_.upper[j]);
+      }
+      // Let's look for cuts
+      // Compute b_bar
+      std::vector<f_t> b_bar(original_lp_.num_rows);
+      basis_update.b_solve(original_lp_.rhs, b_bar);
+
+      std::vector<f_t> nonbasic_mark(original_lp_.num_cols, 0);
+      for (i_t j : nonbasic_list) {
+        nonbasic_mark[j] = 1;
+      }
+
+      std::vector<f_t> x_workspace(original_lp_.num_cols, 0.0);
+      std::vector<i_t> x_mark(original_lp_.num_cols, 0);
+
+      std::vector<i_t> abar_indices;
+      abar_indices.reserve(original_lp_.num_cols);
+
+      std::vector<i_t> has_lower(original_lp_.num_cols, 0);
+      std::vector<i_t> has_upper(original_lp_.num_cols, 0);
+      for (i_t j = 0; j < original_lp_.num_cols; j++) {
+        if (original_lp_.lower[j] < 0) {
+          settings_.log.printf(
+            "Variable %d has negative lower bound %e\n", j, original_lp_.lower[j]);
+          exit(1);
+        }
+        const f_t uj      = original_lp_.upper[j];
+        const f_t lj      = original_lp_.lower[j];
+        const f_t xstar_j = root_relax_soln_.x[j];
+        if (uj < inf) {
+          if (uj - xstar_j <= xstar_j - lj) {
+            has_upper[j] = 1;
+            //settings_.log.printf("Variable %d in upper\n", j);
+          } else {
+            has_lower[j] = 1;
+            //settings_.log.printf("Variable %d in lower\n", j);
+          }
+          continue;
+        }
+
+        if (lj > -inf) {
+          has_lower[j] = 1;
+          //settings_.log.printf("Variable %d in lower\n", j);
+        }
+      }
+
+      csr_matrix_t<i_t, f_t> C(0, original_lp_.num_cols, 0);
+      C.row_start[0] = 0;
+      std::vector<f_t> cut_rhs;
+
+      for (i_t i = 0; i < original_lp_.num_rows; i++) {
+        const i_t j = basic_list[i];
+        //settings_.log.printf(
+        //  "Variable %d type %d val %e\n", j, var_types_[j], root_relax_soln_.x[j]);
+        if (var_types_[j] != variable_type_t::INTEGER) { continue; }
+        const f_t x_j = root_relax_soln_.x[j];
+        if (std::abs(x_j - std::round(x_j)) < settings_.integer_tol) { continue; }
+
+        settings_.log.printf("Generating cut for variable %d relaxed value %e row %d\n", j, x_j, i);
+#ifdef PRINT_BASIS
+        for (i_t h = 0; h < basic_list.size(); h++) {
+          settings_.log.printf("basic_list[%d] = %d\n", h, basic_list[h]);
+        }
+#endif
+
+        // Solve B^T u_bar = e_i
+        sparse_vector_t<i_t, f_t> e_i(original_lp_.num_rows, 1);
+        e_i.i[0] = i;
+        e_i.x[0] = 1.0;
+        sparse_vector_t<i_t, f_t> u_bar(original_lp_.num_rows, 0);
+        basis_update.b_transpose_solve(e_i, u_bar);
+
+        std::vector<f_t> u_bar_dense(original_lp_.num_rows);
+        u_bar.to_dense(u_bar_dense);
+
+        std::vector<f_t> BTu_bar(original_lp_.num_rows);
+        b_transpose_multiply(original_lp_, basic_list, u_bar_dense, BTu_bar);
+        for (i_t k = 0; k < original_lp_.num_rows; k++) {
+          if (k == i) {
+            if (std::abs(BTu_bar[k] - 1.0) > 1e-6) {
+              settings_.log.printf("BTu_bar[%d] = %e i %d\n", k, BTu_bar[k], i);
+              exit(1);
+            }
+          } else {
+            if (std::abs(BTu_bar[k]) > 1e-6) {
+              settings_.log.printf("BTu_bar[%d] = %e i %d\n", k, BTu_bar[k], i);
+              exit(1);
+            }
+          }
+        }
+
+        // Compute a_bar = N^T u_bar
+        const i_t nz_ubar = u_bar.i.size();
+        for (i_t k = 0; k < nz_ubar; k++) {
+          const i_t ii        = u_bar.i[k];
+          const f_t u_bar_i   = u_bar.x[k];
+          const i_t row_start = Arow.col_start[ii];
+          const i_t row_end   = Arow.col_start[ii + 1];
+          for (i_t p = row_start; p < row_end; p++) {
+            const i_t jj = Arow.i[p];
+            if (nonbasic_mark[jj] == 1) {
+              x_workspace[jj] += u_bar_i * Arow.x[p];
+              if (!x_mark[jj]) {
+                x_mark[jj] = 1;
+                abar_indices.push_back(jj);
+              }
+            }
+          }
+        }
+
+        sparse_vector_t<i_t, f_t> a_bar(original_lp_.num_cols, abar_indices.size() + 1);
+        for (i_t k = 0; k < abar_indices.size(); k++) {
+          const i_t jj = abar_indices[k];
+          a_bar.i[k]   = jj;
+          a_bar.x[k]   = x_workspace[jj];
+        }
+
+        // Clear the workspace
+        for (i_t jj : abar_indices) {
+          x_workspace[jj] = 0.0;
+          x_mark[jj]      = 0;
+        }
+        abar_indices.clear();
+
+        // We should now have the base inequality
+        // x_j + a_bar^T x_N >= b_bar_i
+        // We add x_j into a_bar so that everything is in a single sparse_vector_t
+        a_bar.i[a_bar.i.size() - 1] = j;
+        a_bar.x[a_bar.x.size() - 1] = 1.0;
+
+        std::vector<f_t> a_bar_dense(original_lp_.num_cols);
+        a_bar.to_dense(a_bar_dense);
+
+        f_t a_bar_dense_dot = dot<i_t, f_t>(a_bar_dense, root_relax_soln_.x);
+        settings_.log.printf("a_bar_dense_dot = %e b_bar[%d] = %e\n", a_bar_dense_dot, i, b_bar[i]);
+
+        settings_.log.printf("x_j %e b_bar_i %e\n", x_j, b_bar[i]);
+
+        // Print out the base inequality
+        for (i_t k = 0; k < a_bar.i.size(); k++) {
+          const i_t jj = a_bar.i[k];
+          const f_t aj = a_bar.x[k];
+          settings_.log.printf("a_bar[%d] = %e\n", k, aj);
+        }
+        settings_.log.printf("b_bar[%d] = %e\n", i, b_bar[i]);
+
+        auto f = [](f_t q_1, f_t q_2) -> f_t {
+          f_t q_1_hat = q_1 - std::floor(q_1);
+          f_t q_2_hat = q_2 - std::floor(q_2);
+          return std::min(q_1_hat, q_2_hat) + q_2_hat * std::floor(q_1);
+        };
+
+        auto h = [](f_t q) -> f_t { return std::max(q, 0.0); };
+
+        f_t R = (b_bar[i] - std::floor(b_bar[i])) * std::ceil(b_bar[i]);
+        std::vector<i_t> cut_indices;
+        cut_indices.reserve(a_bar.i.size());
+        for (i_t k = 0; k < a_bar.i.size(); k++) {
+          const i_t jj = a_bar.i[k];
+          f_t aj       = a_bar.x[k];
+          if (var_types_[jj] == variable_type_t::INTEGER) {
+            x_workspace[jj] += f(aj, b_bar[i]);
+            if (!x_mark[jj]) {
+              x_mark[jj] = 1;
+              cut_indices.push_back(jj);
+            }
+          } else {
+            x_workspace[jj] += h(aj);
+            if (!x_mark[jj]) {
+              x_mark[jj] = 1;
+              cut_indices.push_back(jj);
+            }
+          }
+        }
+
+#ifdef CMIR
+        // Compute r
+        f_t r = b_bar[i];
+        for (i_t k = 0; k < a_bar.i.size(); k++) {
+          const i_t jj = a_bar.i[k];
+          if (has_upper[jj]) {
+            const f_t uj = original_lp_.upper[jj];
+            r -= uj * a_bar.x[k];
+            continue;
+          }
+          if (has_lower[jj]) {
+            const f_t lj = original_lp_.lower[jj];
+            r -= lj * a_bar.x[k];
+          }
+        }
+
+        // Compute R
+        f_t R = std::ceil(r) * (r - std::floor(r));
+        for (i_t k = 0; k < a_bar.i.size(); k++) {
+          const i_t jj = a_bar.i[k];
+          const f_t aj = a_bar.x[k];
+          if (has_upper[jj]) {
+            const f_t uj = original_lp_.upper[jj];
+            if (var_types_[jj] == variable_type_t::INTEGER) {
+              R -= f(-aj, r) * uj;
+            } else {
+              R -= h(-aj) * uj;
+            }
+          } else if (has_lower[jj]) {
+            const f_t lj = original_lp_.lower[jj];
+            if (var_types_[jj] == variable_type_t::INTEGER) {
+              R += f(aj, r) * lj;
+            } else {
+              R += h(aj) * lj;
+            }
+          }
+        }
+
+        // Compute the cut coefficients
+        std::vector<i_t> cut_indices;
+        cut_indices.reserve(a_bar.i.size());
+        for (i_t k = 0; k < a_bar.i.size(); k++) {
+          const i_t jj = a_bar.i[k];
+          const f_t aj = a_bar.x[k];
+          if (has_upper[jj]) {
+            if (var_types_[jj] == variable_type_t::INTEGER) {
+              // Upper intersect I
+              x_workspace[jj] -= f(-aj, r);
+              if (!x_mark[jj]) {
+                x_mark[jj] = 1;
+                cut_indices.push_back(jj);
+              }
+            } else {
+              // Upper intersect C
+              x_workspace[jj] -= h(-aj);
+              if (!x_mark[jj]) {
+                x_mark[jj] = 1;
+                cut_indices.push_back(jj);
+              }
+            }
+          } else if (var_types_[jj] == variable_type_t::INTEGER) {
+            // I \ Upper
+            x_workspace[jj] -= f(aj, r);
+            if (!x_mark[jj]) {
+              x_mark[jj] = 1;
+              cut_indices.push_back(jj);
+            }
+          } else {
+            // C \ Upper
+            x_workspace[jj] += h(aj);
+            if (!x_mark[jj]) {
+              x_mark[jj] = 1;
+              cut_indices.push_back(jj);
+            }
+          }
+        }
+#endif
+
+        sparse_vector_t<i_t, f_t> cut(original_lp_.num_cols, cut_indices.size());
+        for (i_t k = 0; k < cut_indices.size(); k++) {
+          const i_t jj = cut_indices[k];
+          cut.i[k]     = jj;
+          cut.x[k]     = x_workspace[jj];
+        }
+
+        // Clear the workspace
+        for (i_t jj : cut_indices) {
+          x_workspace[jj] = 0.0;
+          x_mark[jj]      = 0;
+        }
+
+        // Sort the coefficients by their index
+        cut.sort();
+        // The new cut is: g'*x >= R
+        // But we want to have it in the form h'*x <= b
+        for (i_t k = 0; k < cut.x.size(); k++) {
+          cut.x[k] *= -1.0;
+        }
+
+        C.append_row(cut);
+        cut_rhs.push_back(-R);
+      }
+
+      csc_matrix_t<i_t, f_t> C_col(C.m, C.n, 0);
+      C.to_compressed_col(C_col);
+
+#ifdef PRINT_CUTS
+      C_col.print_matrix();
+#endif
+
+      C.check_matrix();
+#ifdef PRINT_CUT_RHS
+      for (i_t k = 0; k < cut_rhs.size(); k++) {
+        lp_settings.log.printf("cut_rhs[%d] = %e\n", k, cut_rhs[k]);
+      }
+#endif
+
+      lp_settings.log.printf("C nz %d\n", C.row_start[C.m]);
+      lp_settings.log.printf("C m %d cut rhs size %d\n", C.m, cut_rhs.size());
+      lp_settings.log.printf("original_lp_.num_cols %d\n", original_lp_.num_cols);
+
+#ifdef PRINT_OPTIMAL
+      for (i_t j = 0; j < original_lp_.num_cols; j++) {
+        lp_settings.log.printf("x[%d] = %e\n", j, root_relax_soln_.x[j]);
+      }
+#endif
+
+      // Check to see that this is a cut i.e C*x > d
+      std::vector<f_t> Cx(C.m);
+      matrix_vector_multiply(C_col, 1.0, root_relax_soln_.x, 0.0, Cx);
+      for (i_t k = 0; k < Cx.size(); k++) {
+        //lp_settings.log.printf("Cx[%d] = %e cut_rhs[%d] = %e\n", k, Cx[k], k, cut_rhs[k]);
+        if (Cx[k] <= cut_rhs[k]) {
+          lp_settings.log.printf("C*x <= d for cut %d\n", k);
+          exit(1);
+        }
+      }
+
+      // Resolve the LP with the new cuts
+      lp_settings.log.printf("Solving LP with %d cuts\n", C.m);
+
+      lp_status_t cut_status = solve_linear_program_with_cuts(stats_.start_time,
+                                                              lp_settings,
+                                                              C,
+                                                              cut_rhs,
+                                                              original_lp_,
+                                                              root_relax_soln_,
+                                                              basis_update,
+                                                              basic_list,
+                                                              nonbasic_list,
+                                                              root_vstatus_,
+                                                              edge_norms_);
+
+      root_objective_ = compute_objective(original_lp_, root_relax_soln_.x);
+
+      if (cut_status != lp_status_t::OPTIMAL) {
+        lp_settings.log.printf("Cut status %d\n", cut_status);
+        exit(1);
+      }
+
+      original_lp_.A.transpose(Arow);
+      var_types_.resize(original_lp_.num_cols, variable_type_t::CONTINUOUS);
+
+      fractional.clear();
+      num_fractional = fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional);
     }
-    return mip_status_t::OPTIMAL;
   }
 
   pc_.resize(original_lp_.num_cols);
@@ -1177,8 +1539,6 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
     " | Explored | Unexplored |    Objective    |     Bound     | Depth | Iter/Node |   Gap    "
     "|  Time  |\n");
 
-  csc_matrix_t<i_t, f_t> Arow(1, 1, 1);
-  original_lp_.A.transpose(Arow);
 
   stats_.nodes_explored       = 0;
   stats_.nodes_unexplored     = 2;
diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp
index 39ea9b465..2ff075c15 100644
--- a/cpp/src/dual_simplex/phase2.cpp
+++ b/cpp/src/dual_simplex/phase2.cpp
@@ -2397,6 +2397,39 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
     }
     timers.pricing_time += timers.stop_timer();
     if (leaving_index == -1) {
+
+
+#ifdef CHECK_BASIS_UPDATE
+      for (i_t k = 0; k < basic_list.size(); k++) {
+        const i_t jj = basic_list[k];
+        sparse_vector_t<i_t, f_t> ei_sparse(m, 1);
+        ei_sparse.i[0] = k;
+        ei_sparse.x[0] = 1.0;
+        sparse_vector_t<i_t, f_t> ubar_sparse(m, 0);
+        ft.b_transpose_solve(ei_sparse, ubar_sparse);
+        std::vector<f_t> ubar_dense(m);
+        ubar_sparse.to_dense(ubar_dense);
+        std::vector<f_t> BTu_dense(m);
+        b_transpose_multiply(lp, basic_list, ubar_dense, BTu_dense);
+        for (i_t l = 0; l < m; l++) {
+          if (l != k) {
+              settings.log.printf("BTu_dense[%d] = %e i %d\n", l, BTu_dense[l], k);
+          } else {
+              settings.log.printf("BTu_dense[%d] = %e != 1.0 i %d\n", l, BTu_dense[l], k);
+          }
+        }
+        for (i_t h = 0; h < m; h++) {
+          settings.log.printf("i %d ubar_dense[%d] = %.16e\n", k, h, ubar_dense[h]);
+        }
+      }
+      settings.log.printf("ft.num_updates() %d\n", ft.num_updates());
+      for (i_t h = 0; h < m; h++) {
+        settings.log.printf("basic_list[%d] = %d\n", h, basic_list[h]);
+      }
+
+#endif
+
+
       phase2::prepare_optimality(lp,
                                  settings,
                                  ft,
diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp
index f8fbd66c1..8874bf420 100644
--- a/cpp/src/dual_simplex/solve.cpp
+++ b/cpp/src/dual_simplex/solve.cpp
@@ -8,6 +8,7 @@
 #include <dual_simplex/solve.hpp>
 
 #include <dual_simplex/barrier.hpp>
+#include <dual_simplex/basis_solves.hpp>
 #include <dual_simplex/branch_and_bound.hpp>
 #include <dual_simplex/crossover.hpp>
 #include <dual_simplex/initial_basis.hpp>
@@ -310,6 +311,21 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time,
   // by the current solution x* (i.e. C*x* > d), this function
   // adds the cuts into the LP and solves again.
 
+
+  {
+    csc_matrix_t<i_t, f_t> Btest(lp.num_rows, lp.num_rows, 1);
+    basis_update.multiply_lu(Btest);
+    csc_matrix_t<i_t, f_t> B(lp.num_rows, lp.num_rows, 1);
+    form_b(lp.A, basic_list, B);
+    csc_matrix_t<i_t, f_t> Diff(lp.num_rows, lp.num_rows, 1);
+    add(Btest, B, 1.0, -1.0, Diff);
+    const f_t err = Diff.norm1();
+    settings.log.printf("Before || B - L*U || %e\n", err);
+    if (err > 1e-6) {
+      exit(1);
+    }
+  }
+
   const i_t p = cuts.m;
   if (cut_rhs.size() != static_cast<size_t>(p)) {
     settings.log.printf("cut_rhs must have the same number of rows as cuts\n");
@@ -362,23 +378,31 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time,
   }
 
   // Construct C_B = C(:, basic_list)
-  std::vector<i_t> C_col_degree(p, 0);
+  std::vector<i_t> C_col_degree(lp.num_cols, 0);
   i_t cuts_nz = cuts.row_start[p];
   for (i_t q = 0; q < cuts_nz; q++) {
     const i_t j = cuts.j[q];
+    if (j >= lp.num_cols) {
+      settings.log.printf("j %d is greater than p %d\n", j, p);
+      exit(1);
+    }
     C_col_degree[j]++;
   }
 
-  std::vector<i_t> in_basis(old_cols, 0);
+  std::vector<i_t> in_basis(old_cols, -1);
   const i_t num_basic = static_cast<i_t>(basic_list.size());
   i_t C_B_nz          = 0;
   for (i_t k = 0; k < num_basic; k++) {
     const i_t j = basic_list[k];
-    in_basis[j] = 1;
-    C_B_nz += C_col_degree[j];
+    in_basis[j] = k;
+    if (j < p)
+    {
+      C_B_nz += C_col_degree[j];
+    }
   }
+  settings.log.printf("Done estimating C_B_nz\n");
 
-  csr_matrix_t<i_t, f_t> C_B(num_basic, num_basic, C_B_nz);
+  csr_matrix_t<i_t, f_t> C_B(p, num_basic, C_B_nz);
   nz = 0;
   for (i_t i = 0; i < p; i++) {
     C_B.row_start[i]    = nz;
@@ -386,30 +410,49 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time,
     const i_t row_end   = cuts.row_start[i + 1];
     for (i_t q = row_start; q < row_end; q++) {
       const i_t j = cuts.j[q];
-      if (in_basis[j] == 0) { continue; }
-      C_B.j[nz] = j;
+      const i_t j_basis = in_basis[j];
+      if (j_basis == -1) { continue; }
+      C_B.j[nz] = j_basis;
       C_B.x[nz] = cuts.x[q];
       nz++;
     }
   }
   C_B.row_start[p] = nz;
   settings.log.printf("predicted nz %d actual nz %d\n", C_B_nz, nz);
-  if (nz != C_B_nz) { return lp_status_t::NUMERICAL_ISSUES; }
+  if (nz != C_B_nz) { exit(1); return lp_status_t::NUMERICAL_ISSUES; }
+  settings.log.printf("C_B rows %d cols %d nz %d\n", C_B.m, C_B.n, nz);
+
 
   // Adjust the basis update to include the new cuts
   basis_update.append_cuts(C_B);
 
+  basic_list.resize(lp.num_rows, 0);
+  i_t h = old_cols;
+  for (i_t j = old_rows; j < lp.num_rows; j++) {
+    basic_list[j] = h++;
+  }
+
+  // Check the basis update
+  csc_matrix_t<i_t, f_t> Btest(lp.num_rows, lp.num_rows, 1);
+  basis_update.multiply_lu(Btest);
+
+  csc_matrix_t<i_t, f_t> B(lp.num_rows, lp.num_rows, 1);
+  form_b(lp.A, basic_list, B);
+
+  csc_matrix_t<i_t, f_t> Diff(lp.num_rows, lp.num_rows, 1);
+  add(Btest, B, 1.0, -1.0, Diff);
+  const f_t err = Diff.norm1();
+  settings.log.printf("After || B - L*U || %e\n", err);
+  if (err > 1e-6) {
+    Diff.print_matrix();
+      exit(1);
+  }
   // Adjust the vstatus
   vstatus.resize(lp.num_cols);
   for (i_t j = old_cols; j < lp.num_cols; j++) {
     vstatus[j] = variable_status_t::BASIC;
   }
 
-  basic_list.resize(lp.num_rows, 0);
-  i_t h = old_cols;
-  for (i_t j = old_rows; j < lp.num_rows; j++) {
-    basic_list[j] = h++;
-  }
   // Adjust the solution
   solution.x.resize(lp.num_cols, 0.0);
   solution.y.resize(lp.num_rows, 0.0);
@@ -418,9 +461,10 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time,
   // For now just clear the edge norms
   edge_norms.clear();
   i_t iter              = 0;
+  bool initialize_basis = false;
   dual::status_t status = dual_phase2_with_advanced_basis(2,
                                                           0,
-                                                          false,
+                                                          initialize_basis,
                                                           start_time,
                                                           lp,
                                                           settings,
diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp
index 3e01c2f9c..0da4f90e9 100644
--- a/cpp/src/dual_simplex/sparse_matrix.cpp
+++ b/cpp/src/dual_simplex/sparse_matrix.cpp
@@ -399,6 +399,33 @@ i_t csr_matrix_t<i_t, f_t>::append_rows(const csr_matrix_t<i_t, f_t>& C)
   return 0;
 }
 
+template <typename i_t, typename f_t>
+i_t csr_matrix_t<i_t, f_t>::append_row(const sparse_vector_t<i_t, f_t>& c)
+{
+  const i_t old_m = this->m;
+  const i_t n = this->n;
+  const i_t old_nz = this->row_start[old_m];
+  const i_t c_nz = c.i.size();
+  const i_t new_nz = old_nz + c_nz;
+  const i_t new_m = old_m + 1;
+
+  this->j.resize(new_nz);
+  this->x.resize(new_nz);
+  this->row_start.resize(new_m + 1);
+  this->row_start[new_m] = new_nz;
+
+  i_t nz = old_nz;
+  for (i_t k = 0; k < c_nz; k++) {
+    this->j[nz] = c.i[k];
+    this->x[nz] = c.x[k];
+    nz++;
+  }
+
+  this->m = new_m;
+  this->nz_max = new_nz;
+  return 0;
+}
+
 template <typename i_t, typename f_t>
 void csc_matrix_t<i_t, f_t>::print_matrix(FILE* fid) const
 {
diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp
index eefc31b1f..49c5c185a 100644
--- a/cpp/src/dual_simplex/sparse_matrix.hpp
+++ b/cpp/src/dual_simplex/sparse_matrix.hpp
@@ -136,8 +136,12 @@ class csr_matrix_t {
   // Create a new matrix with the marked rows removed
   i_t remove_rows(std::vector<i_t>& row_marker, csr_matrix_t<i_t, f_t>& Aout) const;
 
+  // Append rows from another CSR matrix
   i_t append_rows(const csr_matrix_t<i_t, f_t>& C);
 
+  // Append a row from a sparse vector
+  i_t append_row(const sparse_vector_t<i_t, f_t>& c);
+
   // Ensures no repeated column indices within a row
   void check_matrix() const;
 
diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu
index 28659cccd..e8978e17e 100644
--- a/cpp/src/mip/solver.cu
+++ b/cpp/src/mip/solver.cu
@@ -109,7 +109,8 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
 
   diversity_manager_t<i_t, f_t> dm(context);
   dm.timer              = timer_;
-  bool presolve_success = dm.run_presolve(timer_.remaining_time());
+  //bool presolve_success = dm.run_presolve(timer_.remaining_time());
+  bool presolve_success = true;
   if (!presolve_success) {
     CUOPT_LOG_INFO("Problem proven infeasible in presolve");
     solution_t<i_t, f_t> sol(*context.problem_ptr);
@@ -117,7 +118,7 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
     context.problem_ptr->post_process_solution(sol);
     return sol;
   }
-  if (context.problem_ptr->empty) {
+  if (0 && context.problem_ptr->empty) {
     CUOPT_LOG_INFO("Problem full reduced in presolve");
     solution_t<i_t, f_t> sol(*context.problem_ptr);
     sol.set_problem_fully_reduced();
@@ -126,7 +127,7 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
   }
 
   // if the problem was reduced to a LP: run concurrent LP
-  if (context.problem_ptr->n_integer_vars == 0) {
+  if (0 && context.problem_ptr->n_integer_vars == 0) {
     CUOPT_LOG_INFO("Problem reduced to a LP, running concurrent LP");
     pdlp_solver_settings_t<i_t, f_t> settings{};
     settings.time_limit = timer_.remaining_time();
@@ -224,6 +225,9 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
                                                 std::ref(branch_and_bound_solution));
   }
 
+  auto bb_status = branch_and_bound_status_future.get();
+  exit(1);
+
   // Start the primal heuristics
   auto sol = dm.run_solver();
   if (!context.settings.heuristics_only) {

From 96ed3864b2595b2d837b403c6c63147e164bce6a Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Mon, 24 Nov 2025 17:13:19 -0800
Subject: [PATCH 05/27] Fix unit test

---
 cpp/tests/dual_simplex/unit_tests/solve.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cpp/tests/dual_simplex/unit_tests/solve.cpp b/cpp/tests/dual_simplex/unit_tests/solve.cpp
index 41a3a8e62..95623f4b2 100644
--- a/cpp/tests/dual_simplex/unit_tests/solve.cpp
+++ b/cpp/tests/dual_simplex/unit_tests/solve.cpp
@@ -438,8 +438,12 @@ TEST(dual_simplex, simple_cuts)
   EXPECT_NEAR(solution.objective, 4.0 / 3.0, 1e-6);
 
   cuts.row_start.resize(3);
+  cuts.j.resize(2);
+  cuts.x.resize(2);
+  // Add cut y >= 1/3
   cuts.j[0]         = 1;
   cuts.row_start[2] = 2;
+  // Add cut x <= 0.0
   cuts.j[1]         = 0;
   cuts.x[1]         = 1.0;
   cuts.m            = 2;

From 6ff7952f4772ff577c0f220475119e95b2f15198 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Mon, 24 Nov 2025 17:20:28 -0800
Subject: [PATCH 06/27] Fix issue when computing nonzeros in C_B

---
 cpp/src/dual_simplex/solve.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp
index 8874bf420..3fba1a5ac 100644
--- a/cpp/src/dual_simplex/solve.cpp
+++ b/cpp/src/dual_simplex/solve.cpp
@@ -395,7 +395,7 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time,
   for (i_t k = 0; k < num_basic; k++) {
     const i_t j = basic_list[k];
     in_basis[j] = k;
-    if (j < p)
+    if (j < cuts.n)
     {
       C_B_nz += C_col_degree[j];
     }

From 20b5777156b48c55fa598fbd9d48d449987c62ea Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Mon, 24 Nov 2025 17:26:01 -0800
Subject: [PATCH 07/27] Check solution values at end of unit test

---
 cpp/tests/dual_simplex/unit_tests/solve.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cpp/tests/dual_simplex/unit_tests/solve.cpp b/cpp/tests/dual_simplex/unit_tests/solve.cpp
index 95623f4b2..cd66e63f1 100644
--- a/cpp/tests/dual_simplex/unit_tests/solve.cpp
+++ b/cpp/tests/dual_simplex/unit_tests/solve.cpp
@@ -465,6 +465,10 @@ TEST(dual_simplex, simple_cuts)
   printf("Solution objective: %e\n", solution.objective);
   printf("Solution x: %e %e %e\n", solution.x[0], solution.x[1], solution.x[2]);
   EXPECT_NEAR(solution.objective, 4.0 / 3.0, 1e-6);
+  EXPECT_NEAR(solution.x[0], 0.0, 1e-6);
+  EXPECT_NEAR(solution.x[1], 2.0 / 3.0, 1e-6);
+  EXPECT_NEAR(solution.x[2], 1.0 / 3.0, 1e-6);
+
 }
 
 }  // namespace cuopt::linear_programming::dual_simplex::test

From ca571a04401a505e155d2ab169fd51dd69186937 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Tue, 25 Nov 2025 13:13:20 -0800
Subject: [PATCH 08/27] Enable c-MIR cuts

---
 cpp/src/dual_simplex/branch_and_bound.cpp | 173 ++++++++++++----------
 1 file changed, 91 insertions(+), 82 deletions(-)

diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp
index aed49ad4f..f9c323133 100644
--- a/cpp/src/dual_simplex/branch_and_bound.cpp
+++ b/cpp/src/dual_simplex/branch_and_bound.cpp
@@ -1159,7 +1159,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
       for (i_t j: fractional) {
         settings_.log.printf("Fractional variable %d lower %e value %e upper %e\n", j, original_lp_.lower[j], root_relax_soln_.x[j], original_lp_.upper[j]);
       }
-      // Let's look for cuts
+      // Let's look for Gomory cuts
       // Compute b_bar
       std::vector<f_t> b_bar(original_lp_.num_rows);
       basis_update.b_solve(original_lp_.rhs, b_bar);
@@ -1177,6 +1177,8 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
 
       std::vector<i_t> has_lower(original_lp_.num_cols, 0);
       std::vector<i_t> has_upper(original_lp_.num_cols, 0);
+
+      bool needs_complement = false;
       for (i_t j = 0; j < original_lp_.num_cols; j++) {
         if (original_lp_.lower[j] < 0) {
           settings_.log.printf(
@@ -1185,23 +1187,27 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
         }
         const f_t uj      = original_lp_.upper[j];
         const f_t lj      = original_lp_.lower[j];
+        if (uj != inf || lj != 0.0) {
+          needs_complement = true;
+        }
         const f_t xstar_j = root_relax_soln_.x[j];
         if (uj < inf) {
           if (uj - xstar_j <= xstar_j - lj) {
             has_upper[j] = 1;
-            //settings_.log.printf("Variable %d in upper\n", j);
+            settings_.log.printf("Variable %d in upper\n", j);
           } else {
             has_lower[j] = 1;
-            //settings_.log.printf("Variable %d in lower\n", j);
+            settings_.log.printf("Variable %d in lower\n", j);
           }
           continue;
         }
 
         if (lj > -inf) {
           has_lower[j] = 1;
-          //settings_.log.printf("Variable %d in lower\n", j);
+          settings_.log.printf("Variable %d in lower\n", j);
         }
       }
+      settings_.log.printf("needs_complement %d\n", needs_complement);
 
       csr_matrix_t<i_t, f_t> C(0, original_lp_.num_cols, 0);
       C.row_start[0] = 0;
@@ -1249,6 +1255,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
         }
 
         // Compute a_bar = N^T u_bar
+        // TODO: This is similar to a function in phase2 of dual simplex. See if it can be reused.
         const i_t nz_ubar = u_bar.i.size();
         for (i_t k = 0; k < nz_ubar; k++) {
           const i_t ii        = u_bar.i[k];
@@ -1295,6 +1302,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
 
         settings_.log.printf("x_j %e b_bar_i %e\n", x_j, b_bar[i]);
 
+#ifdef PRINT_BASE_INEQUALITY
         // Print out the base inequality
         for (i_t k = 0; k < a_bar.i.size(); k++) {
           const i_t jj = a_bar.i[k];
@@ -1302,6 +1310,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
           settings_.log.printf("a_bar[%d] = %e\n", k, aj);
         }
         settings_.log.printf("b_bar[%d] = %e\n", i, b_bar[i]);
+#endif
 
         auto f = [](f_t q_1, f_t q_2) -> f_t {
           f_t q_1_hat = q_1 - std::floor(q_1);
@@ -1311,104 +1320,105 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
 
         auto h = [](f_t q) -> f_t { return std::max(q, 0.0); };
 
-        f_t R = (b_bar[i] - std::floor(b_bar[i])) * std::ceil(b_bar[i]);
+
         std::vector<i_t> cut_indices;
         cut_indices.reserve(a_bar.i.size());
-        for (i_t k = 0; k < a_bar.i.size(); k++) {
-          const i_t jj = a_bar.i[k];
-          f_t aj       = a_bar.x[k];
-          if (var_types_[jj] == variable_type_t::INTEGER) {
-            x_workspace[jj] += f(aj, b_bar[i]);
-            if (!x_mark[jj]) {
-              x_mark[jj] = 1;
-              cut_indices.push_back(jj);
-            }
-          } else {
-            x_workspace[jj] += h(aj);
-            if (!x_mark[jj]) {
-              x_mark[jj] = 1;
-              cut_indices.push_back(jj);
-            }
-          }
-        }
+        f_t R;
+        if (!needs_complement) {
+          R = (b_bar[i] - std::floor(b_bar[i])) * std::ceil(b_bar[i]);
 
-#ifdef CMIR
-        // Compute r
-        f_t r = b_bar[i];
-        for (i_t k = 0; k < a_bar.i.size(); k++) {
-          const i_t jj = a_bar.i[k];
-          if (has_upper[jj]) {
-            const f_t uj = original_lp_.upper[jj];
-            r -= uj * a_bar.x[k];
-            continue;
-          }
-          if (has_lower[jj]) {
-            const f_t lj = original_lp_.lower[jj];
-            r -= lj * a_bar.x[k];
-          }
-        }
-
-        // Compute R
-        f_t R = std::ceil(r) * (r - std::floor(r));
-        for (i_t k = 0; k < a_bar.i.size(); k++) {
-          const i_t jj = a_bar.i[k];
-          const f_t aj = a_bar.x[k];
-          if (has_upper[jj]) {
-            const f_t uj = original_lp_.upper[jj];
+          for (i_t k = 0; k < a_bar.i.size(); k++) {
+            const i_t jj = a_bar.i[k];
+            f_t aj       = a_bar.x[k];
             if (var_types_[jj] == variable_type_t::INTEGER) {
-              R -= f(-aj, r) * uj;
+              x_workspace[jj] += f(aj, b_bar[i]);
+              if (!x_mark[jj]) {
+                x_mark[jj] = 1;
+                cut_indices.push_back(jj);
+              }
             } else {
-              R -= h(-aj) * uj;
+              x_workspace[jj] += h(aj);
+              if (!x_mark[jj]) {
+                x_mark[jj] = 1;
+                cut_indices.push_back(jj);
+              }
             }
-          } else if (has_lower[jj]) {
-            const f_t lj = original_lp_.lower[jj];
-            if (var_types_[jj] == variable_type_t::INTEGER) {
-              R += f(aj, r) * lj;
-            } else {
-              R += h(aj) * lj;
+          }
+        } else {
+          // Compute r
+          f_t r = b_bar[i];
+          for (i_t k = 0; k < a_bar.i.size(); k++) {
+            const i_t jj = a_bar.i[k];
+            if (has_upper[jj]) {
+              const f_t uj = original_lp_.upper[jj];
+              r -= uj * a_bar.x[k];
+              continue;
+            }
+            if (has_lower[jj]) {
+              const f_t lj = original_lp_.lower[jj];
+              r -= lj * a_bar.x[k];
             }
           }
-        }
 
-        // Compute the cut coefficients
-        std::vector<i_t> cut_indices;
-        cut_indices.reserve(a_bar.i.size());
-        for (i_t k = 0; k < a_bar.i.size(); k++) {
-          const i_t jj = a_bar.i[k];
-          const f_t aj = a_bar.x[k];
-          if (has_upper[jj]) {
-            if (var_types_[jj] == variable_type_t::INTEGER) {
-              // Upper intersect I
-              x_workspace[jj] -= f(-aj, r);
+          // Compute R
+          R = std::ceil(r) * (r - std::floor(r));
+          for (i_t k = 0; k < a_bar.i.size(); k++) {
+            const i_t jj = a_bar.i[k];
+            const f_t aj = a_bar.x[k];
+            if (has_upper[jj]) {
+              const f_t uj = original_lp_.upper[jj];
+              if (var_types_[jj] == variable_type_t::INTEGER) {
+                R -= f(-aj, r) * uj;
+              } else {
+                R -= h(-aj) * uj;
+              }
+            } else if (has_lower[jj]) {
+              const f_t lj = original_lp_.lower[jj];
+              if (var_types_[jj] == variable_type_t::INTEGER) {
+                R += f(aj, r) * lj;
+              } else {
+                R += h(aj) * lj;
+              }
+            }
+          }
+
+          // Compute the cut coefficients
+          for (i_t k = 0; k < a_bar.i.size(); k++) {
+            const i_t jj = a_bar.i[k];
+            const f_t aj = a_bar.x[k];
+            if (has_upper[jj]) {
+              if (var_types_[jj] == variable_type_t::INTEGER) {
+                // Upper intersect I
+                x_workspace[jj] -= f(-aj, r);
+                if (!x_mark[jj]) {
+                  x_mark[jj] = 1;
+                  cut_indices.push_back(jj);
+                }
+              } else {
+                // Upper intersect C
+                x_workspace[jj] -= h(-aj);
+                if (!x_mark[jj]) {
+                  x_mark[jj] = 1;
+                  cut_indices.push_back(jj);
+                }
+              }
+            } else if (var_types_[jj] == variable_type_t::INTEGER) {
+              // I \ Upper
+              x_workspace[jj] += f(aj, r);
               if (!x_mark[jj]) {
                 x_mark[jj] = 1;
                 cut_indices.push_back(jj);
               }
             } else {
-              // Upper intersect C
-              x_workspace[jj] -= h(-aj);
+              // C \ Upper
+              x_workspace[jj] += h(aj);
               if (!x_mark[jj]) {
                 x_mark[jj] = 1;
                 cut_indices.push_back(jj);
               }
             }
-          } else if (var_types_[jj] == variable_type_t::INTEGER) {
-            // I \ Upper
-            x_workspace[jj] -= f(aj, r);
-            if (!x_mark[jj]) {
-              x_mark[jj] = 1;
-              cut_indices.push_back(jj);
-            }
-          } else {
-            // C \ Upper
-            x_workspace[jj] += h(aj);
-            if (!x_mark[jj]) {
-              x_mark[jj] = 1;
-              cut_indices.push_back(jj);
-            }
           }
         }
-#endif
 
         sparse_vector_t<i_t, f_t> cut(original_lp_.num_cols, cut_indices.size());
         for (i_t k = 0; k < cut_indices.size(); k++) {
@@ -1437,7 +1447,6 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
 
       csc_matrix_t<i_t, f_t> C_col(C.m, C.n, 0);
       C.to_compressed_col(C_col);
-
 #ifdef PRINT_CUTS
       C_col.print_matrix();
 #endif

From 9dea7ce5c8e6ac5a920450f5b4fead70e7a24b4d Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Tue, 25 Nov 2025 21:01:55 -0800
Subject: [PATCH 09/27] Add integer infeasibility info. Remove inactive cuts.
 Add mip_cut_passes parameter

---
 .../cuopt/linear_programming/constants.h      |   1 +
 .../mip/solver_settings.hpp                   |   1 +
 cpp/src/dual_simplex/basis_updates.cpp        |   2 +-
 cpp/src/dual_simplex/branch_and_bound.cpp     | 236 +++++++++++++++---
 cpp/src/dual_simplex/mip_node.hpp             |  10 +-
 .../dual_simplex/simplex_solver_settings.hpp  |   2 +
 cpp/src/dual_simplex/solve.cpp                |  13 +-
 cpp/src/math_optimization/solver_settings.cu  |   3 +-
 cpp/src/mip/solver.cu                         |   1 +
 9 files changed, 233 insertions(+), 36 deletions(-)

diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h
index cc051ab49..15082254e 100644
--- a/cpp/include/cuopt/linear_programming/constants.h
+++ b/cpp/include/cuopt/linear_programming/constants.h
@@ -57,6 +57,7 @@
 #define CUOPT_MIP_HEURISTICS_ONLY         "mip_heuristics_only"
 #define CUOPT_MIP_SCALING                 "mip_scaling"
 #define CUOPT_MIP_PRESOLVE                "mip_presolve"
+#define CUOPT_MIP_CUT_PASSES              "mip_cut_passes"
 #define CUOPT_SOLUTION_FILE               "solution_file"
 #define CUOPT_NUM_CPU_THREADS             "num_cpu_threads"
 #define CUOPT_USER_PROBLEM_FILE           "user_problem_file"
diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp
index 2c62f1b44..72026d7d1 100644
--- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp
+++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp
@@ -81,6 +81,7 @@ class mip_solver_settings_t {
   f_t time_limit       = std::numeric_limits<f_t>::infinity();
   bool heuristics_only = false;
   i_t num_cpu_threads  = -1;  // -1 means use default number of threads in branch and bound
+  i_t max_cut_passes   = 10;  // number of cut passes to make
   bool log_to_console  = true;
   std::string log_file;
   std::string sol_file;
diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp
index 2590c5226..fd70194e1 100644
--- a/cpp/src/dual_simplex/basis_updates.cpp
+++ b/cpp/src/dual_simplex/basis_updates.cpp
@@ -2267,7 +2267,7 @@ int basis_update_mpf_t<i_t, f_t>::refactor_basis(
                       q,
                       deficient,
                       slacks_needed) == -1) {
-    settings.log.debug("Initial factorization failed\n");
+    settings.log.printf("Initial factorization failed\n");
     basis_repair(A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
 
 #ifdef CHECK_BASIS_REPAIR
diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp
index f9c323133..bb225ddbc 100644
--- a/cpp/src/dual_simplex/branch_and_bound.cpp
+++ b/cpp/src/dual_simplex/branch_and_bound.cpp
@@ -517,12 +517,13 @@ void branch_and_bound_t<i_t, f_t>::add_feasible_solution(f_t leaf_objective,
     f_t lower_bound = get_lower_bound();
     f_t obj         = compute_user_objective(original_lp_, upper_bound_);
     f_t lower       = compute_user_objective(original_lp_, lower_bound);
-    settings_.log.printf("%c%10d   %10lu    %+13.6e    %+10.6e   %6d   %7.1e     %s %9.2f\n",
+    settings_.log.printf("%c%10d   %10lu    %+13.6e    %+10.6e   %6d %6d  %7.1e     %s %9.2f\n",
                          thread_type,
                          nodes_explored,
                          nodes_unexplored,
                          obj,
                          lower,
+                         0,
                          leaf_depth,
                          nodes_explored > 0 ? stats_.total_lp_iters / nodes_explored : 0,
                          user_mip_gap<f_t>(obj, lower).c_str(),
@@ -665,7 +666,7 @@ node_status_t branch_and_bound_t<i_t, f_t>::solve_node(search_tree_t<i_t, f_t>&
 
       assert(leaf_vstatus.size() == leaf_problem.num_cols);
       search_tree.branch(
-        node_ptr, branch_var, leaf_solution.x[branch_var], leaf_vstatus, original_lp_, log);
+        node_ptr, branch_var, leaf_solution.x[branch_var], leaf_num_fractional, leaf_vstatus, original_lp_, log);
       node_ptr->status = node_status_t::HAS_CHILDREN;
       return node_status_t::HAS_CHILDREN;
 
@@ -741,11 +742,12 @@ void branch_and_bound_t<i_t, f_t>::exploration_ramp_up(search_tree_t<i_t, f_t>*
       f_t user_lower       = compute_user_objective(original_lp_, root_objective_);
       std::string gap_user = user_mip_gap<f_t>(obj, user_lower);
 
-      settings_.log.printf(" %10d   %10lu    %+13.6e    %+10.6e   %6d   %7.1e     %s %9.2f\n",
+      settings_.log.printf(" %10d   %10lu    %+13.6e    %+10.6e   %6d %6d  %7.1e     %s %9.2f\n",
                            nodes_explored,
                            nodes_unexplored,
                            obj,
                            user_lower,
+                           node->integer_infeasible,
                            node->depth,
                            nodes_explored > 0 ? stats_.total_lp_iters / nodes_explored : 0,
                            gap_user.c_str(),
@@ -841,11 +843,12 @@ void branch_and_bound_t<i_t, f_t>::explore_subtree(i_t task_id,
         f_t obj              = compute_user_objective(original_lp_, upper_bound);
         f_t user_lower       = compute_user_objective(original_lp_, get_lower_bound());
         std::string gap_user = user_mip_gap<f_t>(obj, user_lower);
-        settings_.log.printf(" %10d   %10lu    %+13.6e    %+10.6e   %6d   %7.1e     %s %9.2f\n",
+        settings_.log.printf(" %10d   %10lu    %+13.6e    %+10.6e   %6d %6d  %7.1e     %s %9.2f\n",
                              nodes_explored,
                              nodes_unexplored,
                              obj,
                              user_lower,
+                             node_ptr->integer_infeasible,
                              node_ptr->depth,
                              nodes_explored > 0 ? stats_.total_lp_iters / nodes_explored : 0,
                              gap_user.c_str(),
@@ -1065,6 +1068,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   root_relax_soln_.resize(original_lp_.num_rows, original_lp_.num_cols);
 
   settings_.log.printf("Solving LP root relaxation\n");
+  i_t original_rows = original_lp_.num_rows;
   simplex_solver_settings_t lp_settings = settings_;
   lp_settings.inside_mip                = 1;
   lp_settings.scale_columns = false;
@@ -1126,13 +1130,21 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   csc_matrix_t<i_t, f_t> Arow(1, 1, 1);
   original_lp_.A.transpose(Arow);
 
-  for (i_t cut_pass = 0; cut_pass < 10; cut_pass++) {
+  if (num_fractional != 0) {
+    settings_.log.printf(
+      " | Explored | Unexplored |    Objective    |     Bound     | IntInf | Depth | Iter/Node |   Gap    "
+      "|  Time  |\n");
+  }
+
+  for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) {
     if (num_fractional == 0) {
+#ifdef PRINT_SOLUTION
       for (i_t j = 0; j < original_lp_.num_cols; j++) {
         if (var_types_[j] == variable_type_t::INTEGER) {
           settings_.log.printf("Variable %d type %d val %e\n", j, var_types_[j], root_relax_soln_.x[j]);
         }
       }
+#endif
       mutex_upper_.lock();
       incumbent_.set_incumbent_solution(root_objective_, root_relax_soln_.x);
       upper_bound_ = root_objective_;
@@ -1155,10 +1167,12 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
       }
       return mip_status_t::OPTIMAL;
     } else {
+#ifdef PRINT_FRACTIONAL_INFO
       settings_.log.printf("Found %d fractional variables on cut pass %d\n", num_fractional, cut_pass);
       for (i_t j: fractional) {
         settings_.log.printf("Fractional variable %d lower %e value %e upper %e\n", j, original_lp_.lower[j], root_relax_soln_.x[j], original_lp_.upper[j]);
       }
+#endif
       // Let's look for Gomory cuts
       // Compute b_bar
       std::vector<f_t> b_bar(original_lp_.num_rows);
@@ -1166,6 +1180,10 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
 
       std::vector<f_t> nonbasic_mark(original_lp_.num_cols, 0);
       for (i_t j : nonbasic_list) {
+        if (j < 0 || j >= original_lp_.num_cols) {
+          settings_.log.printf("nonbasic_list out of bounds %d num_cols %d\n", j, original_lp_.num_cols);
+          exit(1);
+        }
         nonbasic_mark[j] = 1;
       }
 
@@ -1194,20 +1212,19 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
         if (uj < inf) {
           if (uj - xstar_j <= xstar_j - lj) {
             has_upper[j] = 1;
-            settings_.log.printf("Variable %d in upper\n", j);
           } else {
             has_lower[j] = 1;
-            settings_.log.printf("Variable %d in lower\n", j);
           }
           continue;
         }
 
         if (lj > -inf) {
           has_lower[j] = 1;
-          settings_.log.printf("Variable %d in lower\n", j);
         }
       }
+#ifdef PRINT_COMPLEMENT_INFO
       settings_.log.printf("needs_complement %d\n", needs_complement);
+#endif
 
       csr_matrix_t<i_t, f_t> C(0, original_lp_.num_cols, 0);
       C.row_start[0] = 0;
@@ -1220,8 +1237,9 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
         if (var_types_[j] != variable_type_t::INTEGER) { continue; }
         const f_t x_j = root_relax_soln_.x[j];
         if (std::abs(x_j - std::round(x_j)) < settings_.integer_tol) { continue; }
-
+#ifdef PRINT_CUT_INFO
         settings_.log.printf("Generating cut for variable %d relaxed value %e row %d\n", j, x_j, i);
+#endif
 #ifdef PRINT_BASIS
         for (i_t h = 0; h < basic_list.size(); h++) {
           settings_.log.printf("basic_list[%d] = %d\n", h, basic_list[h]);
@@ -1298,9 +1316,18 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
         a_bar.to_dense(a_bar_dense);
 
         f_t a_bar_dense_dot = dot<i_t, f_t>(a_bar_dense, root_relax_soln_.x);
-        settings_.log.printf("a_bar_dense_dot = %e b_bar[%d] = %e\n", a_bar_dense_dot, i, b_bar[i]);
+        if (std::abs(a_bar_dense_dot - b_bar[i]) > 1e-6) {
+          settings_.log.printf("a_bar_dense_dot = %e b_bar[%d] = %e\n", a_bar_dense_dot, i, b_bar[i]);
+          settings_.log.printf("x_j %e b_bar_i %e\n", x_j, b_bar[i]);
+          exit(1);
+        }
 
-        settings_.log.printf("x_j %e b_bar_i %e\n", x_j, b_bar[i]);
+        // Skip cuts that are shallow
+        const f_t shallow_tol = 1e-2;
+        if (std::abs(x_j - std::round(x_j)) < shallow_tol) {
+          //settings_.log.printf("Skipping shallow cut %d. b_bar[%d] = %e x_j %e\n", i, i, b_bar[i], x_j);
+          continue;
+        }
 
 #ifdef PRINT_BASE_INEQUALITY
         // Print out the base inequality
@@ -1396,10 +1423,13 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
                 }
               } else {
                 // Upper intersect C
-                x_workspace[jj] -= h(-aj);
-                if (!x_mark[jj]) {
-                  x_mark[jj] = 1;
-                  cut_indices.push_back(jj);
+                f_t h_j = h(-aj);
+                if (h_j != 0.0) {
+                  x_workspace[jj] -= h_j;
+                  if (!x_mark[jj]) {
+                    x_mark[jj] = 1;
+                    cut_indices.push_back(jj);
+                  }
                 }
               }
             } else if (var_types_[jj] == variable_type_t::INTEGER) {
@@ -1411,10 +1441,13 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
               }
             } else {
               // C \ Upper
-              x_workspace[jj] += h(aj);
-              if (!x_mark[jj]) {
-                x_mark[jj] = 1;
-                cut_indices.push_back(jj);
+              f_t h_j = h(aj);
+              if (h_j != 0.0) {
+                x_workspace[jj] += h_j;
+                if (!x_mark[jj]) {
+                  x_mark[jj] = 1;
+                  cut_indices.push_back(jj);
+                }
               }
             }
           }
@@ -1454,13 +1487,15 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
       C.check_matrix();
 #ifdef PRINT_CUT_RHS
       for (i_t k = 0; k < cut_rhs.size(); k++) {
-        lp_settings.log.printf("cut_rhs[%d] = %e\n", k, cut_rhs[k]);
+        settings_.log.printf("cut_rhs[%d] = %e\n", k, cut_rhs[k]);
       }
 #endif
 
-      lp_settings.log.printf("C nz %d\n", C.row_start[C.m]);
-      lp_settings.log.printf("C m %d cut rhs size %d\n", C.m, cut_rhs.size());
-      lp_settings.log.printf("original_lp_.num_cols %d\n", original_lp_.num_cols);
+#ifdef PRINT_CUT_INFO
+      settings_.log.printf("C nz %d\n", C.row_start[C.m]);
+      settings_.log.printf("C m %d cut rhs size %d\n", C.m, cut_rhs.size());
+      settings_.log.printf("original_lp_.num_cols %d\n", original_lp_.num_cols);
+#endif
 
 #ifdef PRINT_OPTIMAL
       for (i_t j = 0; j < original_lp_.num_cols; j++) {
@@ -1471,16 +1506,22 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
       // Check to see that this is a cut i.e C*x > d
       std::vector<f_t> Cx(C.m);
       matrix_vector_multiply(C_col, 1.0, root_relax_soln_.x, 0.0, Cx);
+      f_t min_cut_violation = inf;
       for (i_t k = 0; k < Cx.size(); k++) {
         //lp_settings.log.printf("Cx[%d] = %e cut_rhs[%d] = %e\n", k, Cx[k], k, cut_rhs[k]);
         if (Cx[k] <= cut_rhs[k]) {
-          lp_settings.log.printf("C*x <= d for cut %d\n", k);
+          settings_.log.printf("C*x <= d for cut %d\n", k);
           exit(1);
         }
+        min_cut_violation = std::min(min_cut_violation, Cx[k] - cut_rhs[k]);
       }
+#ifdef PRINT_MIN_CUT_VIOLATION
+      settings_.log.printf("Min cut violation %e\n", min_cut_violation);
+#endif
 
       // Resolve the LP with the new cuts
-      lp_settings.log.printf("Solving LP with %d cuts\n", C.m);
+      settings_.log.printf("Solving LP with %d cuts (%d nonzeros). Total constraints %d\n", C.m, C.row_start[C.m], C.m + original_lp_.num_rows);
+      lp_settings.log.log = false;
 
       lp_status_t cut_status = solve_linear_program_with_cuts(stats_.start_time,
                                                               lp_settings,
@@ -1493,7 +1534,8 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
                                                               nonbasic_list,
                                                               root_vstatus_,
                                                               edge_norms_);
-
+      settings_.log.printf("Cut LP iterations %d. A nz %d\n", root_relax_soln_.iterations, original_lp_.A.col_start[original_lp_.A.n]);
+      stats_.total_lp_iters += root_relax_soln_.iterations;
       root_objective_ = compute_objective(original_lp_, root_relax_soln_.x);
 
       if (cut_status != lp_status_t::OPTIMAL) {
@@ -1503,9 +1545,146 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
 
       original_lp_.A.transpose(Arow);
       var_types_.resize(original_lp_.num_cols, variable_type_t::CONTINUOUS);
+      std::vector<i_t> cuts_to_remove;
+      cuts_to_remove.reserve(original_lp_.num_rows - original_rows);
+      std::vector<i_t> slacks_to_remove;
+      slacks_to_remove.reserve(original_lp_.num_rows - original_rows);
+      const f_t dual_tol = 1e-10;
+      for (i_t k = original_rows; k < original_lp_.num_rows; k++) {
+        if (std::abs(root_relax_soln_.y[k]) < dual_tol) {
+          const i_t row_start = Arow.col_start[k];
+          const i_t row_end   = Arow.col_start[k + 1];
+          i_t last_slack = -1;
+          const f_t slack_tol = 1e-3;
+          for (i_t p = row_start; p < row_end; p++) {
+            const i_t jj = Arow.i[p];
+            const i_t col_len = original_lp_.A.col_start[jj + 1] - original_lp_.A.col_start[jj];
+            if (var_types_[jj] == variable_type_t::CONTINUOUS &&
+                Arow.x[p] == 1.0 &&
+                original_lp_.lower[jj] == 0.0 &&
+                original_lp_.upper[jj] == inf &&
+                root_vstatus_[jj] == variable_status_t::BASIC &&
+                col_len == 1 &&
+                root_relax_soln_.x[jj] > slack_tol) {
+              last_slack = jj;
+            }
+          }
+          if (last_slack != -1) {
+            cuts_to_remove.push_back(k);
+            slacks_to_remove.push_back(last_slack);
+          }
+        }
+      }
+
+      if (cuts_to_remove.size() > 0) {
+        settings_.log.printf("Removing %d cuts\n", cuts_to_remove.size());
+        std::vector<i_t> marked_rows(original_lp_.num_rows, 0);
+        for (i_t i : cuts_to_remove) {
+          marked_rows[i] = 1;
+        }
+        std::vector<i_t> marked_cols(original_lp_.num_cols, 0);
+        for (i_t j : slacks_to_remove) {
+          marked_cols[j] = 1;
+        }
+
+        std::vector<f_t> new_rhs(original_lp_.num_rows - cuts_to_remove.size());
+        std::vector<f_t> new_solution_y(original_lp_.num_rows - cuts_to_remove.size());
+        i_t h = 0;
+        for (i_t i = 0; i < original_lp_.num_rows; i++) {
+          if (!marked_rows[i]) {
+            new_rhs[h] = original_lp_.rhs[i];
+            new_solution_y[h] = root_relax_soln_.y[i];
+            h++;
+          }
+        }
+
+
+        Arow.remove_columns(marked_rows);
+        Arow.transpose(original_lp_.A);
+
+        std::vector<f_t> new_objective(original_lp_.num_cols - slacks_to_remove.size());
+        std::vector<f_t> new_lower(original_lp_.num_cols - slacks_to_remove.size());
+        std::vector<f_t> new_upper(original_lp_.num_cols - slacks_to_remove.size());
+        std::vector<variable_type_t> new_var_types(original_lp_.num_cols - slacks_to_remove.size());
+        std::vector<variable_status_t> new_vstatus(original_lp_.num_cols - slacks_to_remove.size());
+        std::vector<i_t> new_basic_list;
+        new_basic_list.reserve(original_lp_.num_rows - slacks_to_remove.size());
+        std::vector<i_t> new_nonbasic_list;
+        new_nonbasic_list.reserve(nonbasic_list.size());
+        std::vector<f_t> new_solution_x(original_lp_.num_cols - slacks_to_remove.size());
+        std::vector<f_t> new_solution_z(original_lp_.num_cols - slacks_to_remove.size());
+        h = 0;
+        for (i_t k = 0; k < original_lp_.num_cols; k++) {
+          if (!marked_cols[k]) {
+            new_objective[h] = original_lp_.objective[k];
+            new_lower[h] = original_lp_.lower[k];
+            new_upper[h] = original_lp_.upper[k];
+            new_var_types[h] = var_types_[k];
+            new_vstatus[h] = root_vstatus_[k];
+            new_solution_x[h] = root_relax_soln_.x[k];
+            new_solution_z[h] = root_relax_soln_.z[k];
+            if (new_vstatus[h] != variable_status_t::BASIC) {
+              new_nonbasic_list.push_back(h);
+            } else {
+              new_basic_list.push_back(h);
+            }
+            h++;
+          }
+        }
+        original_lp_.A.remove_columns(marked_cols);
+        original_lp_.A.transpose(Arow);
+        original_lp_.objective = new_objective;
+        original_lp_.lower = new_lower;
+        original_lp_.upper = new_upper;
+        original_lp_.rhs = new_rhs;
+        var_types_ = new_var_types;
+        original_lp_.num_cols = original_lp_.A.n;
+        original_lp_.num_rows = original_lp_.A.m;
+        basic_list = new_basic_list;
+        nonbasic_list = new_nonbasic_list;
+        root_vstatus_ = new_vstatus;
+        root_relax_soln_.x = new_solution_x;
+        root_relax_soln_.y = new_solution_y;
+        root_relax_soln_.z = new_solution_z;
+
+#ifdef PRINT_SIZES
+        settings_.log.printf("A %d x %d\n", original_lp_.A.m, original_lp_.A.n);
+        settings_.log.printf("basic_list size %d\n", basic_list.size());
+        settings_.log.printf("nonbasic_list size %d\n", nonbasic_list.size());
+        settings_.log.printf("root_vstatus_ size %d\n", root_vstatus_.size());
+        settings_.log.printf("original_lp_.num_rows %d\n", original_lp_.num_rows);
+        settings_.log.printf("original_lp_.num_cols %d\n", original_lp_.num_cols);
+        settings_.log.printf("root_relax_soln_.x size %d\n", root_relax_soln_.x.size());
+        settings_.log.printf("root_relax_soln_.y size %d\n", root_relax_soln_.y.size());
+        settings_.log.printf("root_relax_soln_.z size %d\n", root_relax_soln_.z.size());
+        settings_.log.printf("rhs size %ld\n", original_lp_.rhs.size());
+        settings_.log.printf("lower size %ld\n", original_lp_.lower.size());
+        settings_.log.printf("upper size %ld\n", original_lp_.upper.size());
+        settings_.log.printf("objective size %ld\n", original_lp_.objective.size());
+        settings_.log.printf("var_types_ size %ld\n", var_types_.size());
+#endif
+
+        basis_update.resize(original_lp_.num_rows);
+        basis_update.refactor_basis(original_lp_.A, settings_, basic_list, nonbasic_list, root_vstatus_);
+      }
 
       fractional.clear();
       num_fractional = fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional);
+
+      // TODO: Get upper bound from heuristics
+      std::string gap = num_fractional != 0 ? "  -  " : "0.0%";
+      f_t obj = num_fractional != 0 ? inf : compute_user_objective(original_lp_, root_objective_);
+
+      settings_.log.printf(" %10d   %10lu    %+13.6e    %+10.6e   %6d %6d   %7.1e     %s %9.2f\n",
+        0,
+        0,
+        obj,
+        compute_user_objective(original_lp_, root_objective_),
+        num_fractional,
+        0,
+        stats_.total_lp_iters.load(),
+        gap.c_str(),
+        toc(stats_.start_time));
     }
   }
 
@@ -1534,6 +1713,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   search_tree.branch(&search_tree.root,
                      branch_var,
                      root_relax_soln_.x[branch_var],
+                     num_fractional,
                      root_vstatus_,
                      original_lp_,
                      log);
@@ -1544,9 +1724,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
     settings_.num_diving_threads,
     settings_.num_threads);
 
-  settings_.log.printf(
-    " | Explored | Unexplored |    Objective    |     Bound     | Depth | Iter/Node |   Gap    "
-    "|  Time  |\n");
+
 
 
   stats_.nodes_explored       = 0;
diff --git a/cpp/src/dual_simplex/mip_node.hpp b/cpp/src/dual_simplex/mip_node.hpp
index 9034bfa22..f18ae0072 100644
--- a/cpp/src/dual_simplex/mip_node.hpp
+++ b/cpp/src/dual_simplex/mip_node.hpp
@@ -41,6 +41,7 @@ class mip_node_t {
       node_id(0),
       branch_var(-1),
       branch_dir(-1),
+      integer_infeasible(-1),
       vstatus(basis)
   {
     children[0] = nullptr;
@@ -53,6 +54,7 @@ class mip_node_t {
              i_t branch_variable,
              i_t branch_direction,
              f_t branch_var_value,
+             i_t integer_inf,
              const std::vector<variable_status_t>& basis)
     : status(node_status_t::ACTIVE),
       lower_bound(parent_node->lower_bound),
@@ -62,8 +64,8 @@ class mip_node_t {
       branch_var(branch_variable),
       branch_dir(branch_direction),
       fractional_val(branch_var_value),
+      integer_infeasible(integer_inf),
       vstatus(basis)
-
   {
     branch_var_lower =
       branch_direction == 0 ? problem.lower[branch_var] : std::ceil(branch_var_value);
@@ -217,6 +219,7 @@ class mip_node_t {
   f_t branch_var_lower;
   f_t branch_var_upper;
   f_t fractional_val;
+  i_t integer_infeasible;
 
   mip_node_t<i_t, f_t>* parent;
   std::unique_ptr<mip_node_t> children[2];
@@ -272,6 +275,7 @@ class search_tree_t {
   void branch(mip_node_t<i_t, f_t>* parent_node,
               const i_t branch_var,
               const f_t fractional_val,
+              const i_t integer_infeasible,
               const std::vector<variable_status_t>& parent_vstatus,
               const lp_problem_t<i_t, f_t>& original_lp,
               logger_t& log)
@@ -280,13 +284,13 @@ class search_tree_t {
 
     // down child
     auto down_child = std::make_unique<mip_node_t<i_t, f_t>>(
-      original_lp, parent_node, ++id, branch_var, 0, fractional_val, parent_vstatus);
+      original_lp, parent_node, ++id, branch_var, 0, fractional_val, integer_infeasible, parent_vstatus);
 
     graphviz_edge(log, parent_node, down_child.get(), branch_var, 0, std::floor(fractional_val));
 
     // up child
     auto up_child = std::make_unique<mip_node_t<i_t, f_t>>(
-      original_lp, parent_node, ++id, branch_var, 1, fractional_val, parent_vstatus);
+      original_lp, parent_node, ++id, branch_var, 1, fractional_val, integer_infeasible, parent_vstatus);
 
     graphviz_edge(log, parent_node, up_child.get(), branch_var, 1, std::ceil(fractional_val));
 
diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp
index 8e54c40bb..6fffbfe1f 100644
--- a/cpp/src/dual_simplex/simplex_solver_settings.hpp
+++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp
@@ -69,6 +69,7 @@ struct simplex_solver_settings_t {
       num_threads(omp_get_max_threads() - 1),
       num_bfs_threads(std::min(num_threads / 4, 1)),
       num_diving_threads(std::min(num_threads - num_bfs_threads, 1)),
+      max_cut_passes(10),
       random_seed(0),
       inside_mip(0),
       solution_callback(nullptr),
@@ -134,6 +135,7 @@ struct simplex_solver_settings_t {
   i_t random_seed;                 // random seed
   i_t num_bfs_threads;             // number of threads dedicated to the best-first search
   i_t num_diving_threads;          // number of threads dedicated to diving
+  i_t max_cut_passes;              // number of cut passes to make
   i_t inside_mip;  // 0 if outside MIP, 1 if inside MIP at root node, 2 if inside MIP at leaf node
   std::function<void(std::vector<f_t>&, f_t)> solution_callback;
   std::function<void(const std::vector<f_t>&, f_t)> node_processed_callback;
diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp
index 3fba1a5ac..45a4967e9 100644
--- a/cpp/src/dual_simplex/solve.cpp
+++ b/cpp/src/dual_simplex/solve.cpp
@@ -361,6 +361,7 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time,
     lp.upper[j]     = inf;
     lp.objective[j] = 0.0;
   }
+  settings.log.printf("Done adding slacks\n");
   new_A_col.col_start[lp.num_cols + p] = nz;
   new_A_col.n                          = lp.num_cols + p;
 
@@ -376,6 +377,7 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time,
     const i_t h = k - old_rows;
     lp.rhs[k]   = cut_rhs[h];
   }
+  settings.log.printf("Done adding rhs\n");
 
   // Construct C_B = C(:, basic_list)
   std::vector<i_t> C_col_degree(lp.num_cols, 0);
@@ -388,12 +390,17 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time,
     }
     C_col_degree[j]++;
   }
+  settings.log.printf("Done computing C_col_degree\n");
 
   std::vector<i_t> in_basis(old_cols, -1);
   const i_t num_basic = static_cast<i_t>(basic_list.size());
   i_t C_B_nz          = 0;
   for (i_t k = 0; k < num_basic; k++) {
     const i_t j = basic_list[k];
+    if (j < 0 || j >= old_cols) {
+      settings.log.printf("basic_list[%d] = %d is out of bounds %d old_cols %d\n", k, j, j, old_cols);
+      exit(1);
+    }
     in_basis[j] = k;
     if (j < cuts.n)
     {
@@ -444,7 +451,8 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time,
   const f_t err = Diff.norm1();
   settings.log.printf("After || B - L*U || %e\n", err);
   if (err > 1e-6) {
-    Diff.print_matrix();
+    settings.log.printf("Diff matrix\n");
+    //Diff.print_matrix();
       exit(1);
   }
   // Adjust the vstatus
@@ -475,7 +483,8 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time,
                                                           solution,
                                                           iter,
                                                           edge_norms);
-
+  settings.log.printf("Phase 2 iterations %d\n", iter);
+  solution.iterations = iter;
   lp_status_t lp_status;
   if (status == dual::status_t::OPTIMAL) { lp_status = lp_status_t::OPTIMAL; }
   if (status == dual::status_t::DUAL_UNBOUNDED) { lp_status = lp_status_t::INFEASIBLE; }
diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu
index b5da4f095..0d3874321 100644
--- a/cpp/src/math_optimization/solver_settings.cu
+++ b/cpp/src/math_optimization/solver_settings.cu
@@ -86,7 +86,8 @@ solver_settings_t<i_t, f_t>::solver_settings_t() : pdlp_settings(), mip_settings
     {CUOPT_FOLDING, &pdlp_settings.folding, -1, 1, -1},
     {CUOPT_DUALIZE, &pdlp_settings.dualize, -1, 1, -1},
     {CUOPT_ORDERING, &pdlp_settings.ordering, -1, 1, -1},
-    {CUOPT_BARRIER_DUAL_INITIAL_POINT, &pdlp_settings.barrier_dual_initial_point, -1, 1, -1}
+    {CUOPT_BARRIER_DUAL_INITIAL_POINT, &pdlp_settings.barrier_dual_initial_point, -1, 1, -1},
+    {CUOPT_MIP_CUT_PASSES, &mip_settings.max_cut_passes, -1, std::numeric_limits<i_t>::max(), 10}
   };
 
     // Bool parameters
diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu
index e8978e17e..68fb0c698 100644
--- a/cpp/src/mip/solver.cu
+++ b/cpp/src/mip/solver.cu
@@ -168,6 +168,7 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
     branch_and_bound_settings.absolute_mip_gap_tol = context.settings.tolerances.absolute_mip_gap;
     branch_and_bound_settings.relative_mip_gap_tol = context.settings.tolerances.relative_mip_gap;
     branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance;
+    branch_and_bound_settings.max_cut_passes = context.settings.max_cut_passes;
 
     if (context.settings.num_cpu_threads < 0) {
       branch_and_bound_settings.num_threads = omp_get_max_threads() - 1;

From 42af00cc1c80ded53b891d14b41a9fb17f8096f7 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Tue, 2 Dec 2025 10:08:36 -0800
Subject: [PATCH 10/27] Remove small coefficients from cut

---
 cpp/src/dual_simplex/branch_and_bound.cpp | 59 ++++++++++++++++++-----
 cpp/src/dual_simplex/solve.cpp            |  4 ++
 2 files changed, 52 insertions(+), 11 deletions(-)

diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp
index bb225ddbc..ae53d1ba3 100644
--- a/cpp/src/dual_simplex/branch_and_bound.cpp
+++ b/cpp/src/dual_simplex/branch_and_bound.cpp
@@ -1329,6 +1329,12 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
           continue;
         }
 
+        const f_t f_val = b_bar[i] - std::floor(b_bar[i]);
+        if (f_val < 0.01 || f_val > 0.99) {
+          settings_.log.printf("Skipping cut %d. b_bar[%d] = %e f_val %e\n", i, i, b_bar[i], f_val);
+          continue;
+        }
+
 #ifdef PRINT_BASE_INEQUALITY
         // Print out the base inequality
         for (i_t k = 0; k < a_bar.i.size(); k++) {
@@ -1359,13 +1365,13 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
             f_t aj       = a_bar.x[k];
             if (var_types_[jj] == variable_type_t::INTEGER) {
               x_workspace[jj] += f(aj, b_bar[i]);
-              if (!x_mark[jj]) {
+              if (!x_mark[jj] && x_workspace[jj] != 0.0) {
                 x_mark[jj] = 1;
                 cut_indices.push_back(jj);
               }
             } else {
               x_workspace[jj] += h(aj);
-              if (!x_mark[jj]) {
+              if (!x_mark[jj] && x_workspace[jj] != 0.0) {
                 x_mark[jj] = 1;
                 cut_indices.push_back(jj);
               }
@@ -1417,7 +1423,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
               if (var_types_[jj] == variable_type_t::INTEGER) {
                 // Upper intersect I
                 x_workspace[jj] -= f(-aj, r);
-                if (!x_mark[jj]) {
+                if (!x_mark[jj] && x_workspace[jj] != 0.0) {
                   x_mark[jj] = 1;
                   cut_indices.push_back(jj);
                 }
@@ -1435,7 +1441,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
             } else if (var_types_[jj] == variable_type_t::INTEGER) {
               // I \ Upper
               x_workspace[jj] += f(aj, r);
-              if (!x_mark[jj]) {
+              if (!x_mark[jj] && x_workspace[jj] != 0.0) {
                 x_mark[jj] = 1;
                 cut_indices.push_back(jj);
               }
@@ -1453,11 +1459,31 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
           }
         }
 
-        sparse_vector_t<i_t, f_t> cut(original_lp_.num_cols, cut_indices.size());
+        sparse_vector_t<i_t, f_t> cut(original_lp_.num_cols, 0);
+        cut.i.reserve(cut_indices.size());
+        cut.x.reserve(cut_indices.size());
         for (i_t k = 0; k < cut_indices.size(); k++) {
           const i_t jj = cut_indices[k];
-          cut.i[k]     = jj;
-          cut.x[k]     = x_workspace[jj];
+
+          // Check for small coefficients
+          const f_t aj = x_workspace[jj];
+          if (std::abs(aj) < 1e-6) {
+            if (aj >= 0.0 && original_lp_.upper[jj] < inf) {
+              // Move this to the right-hand side
+              //settings_.log.printf("Moving %e to the right-hand side for variable %d\n", aj * original_lp_.upper[jj], jj);
+              R -= aj * original_lp_.upper[jj];
+              continue;
+            } else if (aj <= 0.0 && original_lp_.lower[jj] > -inf) {
+              //settings_.log.printf("Moving %e to the right-hand side for variable %d\n", aj * original_lp_.lower[jj], jj);
+              R += aj * original_lp_.lower[jj];
+              continue;
+            }
+            else {
+              //settings_.log.printf("Small coefficient %e for variable %d lower %e upper %e\n", aj, jj, original_lp_.lower[jj], original_lp_.upper[jj]);
+            }
+          }
+          cut.i.push_back(jj);
+          cut.x.push_back(x_workspace[jj]);
         }
 
         // Clear the workspace
@@ -1466,14 +1492,21 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
           x_mark[jj]      = 0;
         }
 
+        if (cut.x.size() == 0)
+        {
+          continue;
+        }
+        if (cut.x.size() >= 0.7 * original_lp_.num_cols)
+       {
+          settings_.log.printf("Cut %d has %d nonzeros. Skipping because it is too dense %.2f\n", i, cut.x.size(), static_cast<f_t>(cut.x.size()) / original_lp_.num_cols);
+          continue;
+        }
+
         // Sort the coefficients by their index
         cut.sort();
         // The new cut is: g'*x >= R
         // But we want to have it in the form h'*x <= b
-        for (i_t k = 0; k < cut.x.size(); k++) {
-          cut.x[k] *= -1.0;
-        }
-
+        cut.negate();
         C.append_row(cut);
         cut_rhs.push_back(-R);
       }
@@ -1663,6 +1696,10 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
         settings_.log.printf("objective size %ld\n", original_lp_.objective.size());
         settings_.log.printf("var_types_ size %ld\n", var_types_.size());
 #endif
+        settings_.log.printf("After removal %d rows %d columns %d nonzeros\n",
+                            original_lp_.num_rows,
+                            original_lp_.num_cols,
+                            original_lp_.A.col_start[original_lp_.A.n]);
 
         basis_update.resize(original_lp_.num_rows);
         basis_update.refactor_basis(original_lp_.A, settings_, basic_list, nonbasic_list, root_vstatus_);
diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp
index 45a4967e9..fcd8a6386 100644
--- a/cpp/src/dual_simplex/solve.cpp
+++ b/cpp/src/dual_simplex/solve.cpp
@@ -312,6 +312,7 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time,
   // adds the cuts into the LP and solves again.
 
 
+#ifdef CHECK_BASIS
   {
     csc_matrix_t<i_t, f_t> Btest(lp.num_rows, lp.num_rows, 1);
     basis_update.multiply_lu(Btest);
@@ -325,6 +326,7 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time,
       exit(1);
     }
   }
+#endif
 
   const i_t p = cuts.m;
   if (cut_rhs.size() != static_cast<size_t>(p)) {
@@ -439,6 +441,7 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time,
     basic_list[j] = h++;
   }
 
+#ifdef CHECK_BASIS
   // Check the basis update
   csc_matrix_t<i_t, f_t> Btest(lp.num_rows, lp.num_rows, 1);
   basis_update.multiply_lu(Btest);
@@ -455,6 +458,7 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time,
     //Diff.print_matrix();
       exit(1);
   }
+#endif
   // Adjust the vstatus
   vstatus.resize(lp.num_cols);
   for (i_t j = old_cols; j < lp.num_cols; j++) {

From dddf42dcb64645eea9695a999b62425443a10aad Mon Sep 17 00:00:00 2001
From: Alice Boucher <160623740+aliceb-nv@users.noreply.github.com>
Date: Thu, 11 Dec 2025 17:52:36 +0100
Subject: [PATCH 11/27] CI crash fixes (#691)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Summary by CodeRabbit

## Release Notes

* **Performance**
  * Optimized branch-and-bound algorithm with improved search termination conditions

* **Improvements**
  * Enhanced concurrency control mechanisms across solver components
  * Improved logger initialization and lifecycle management for better resource handling

<sub>✏️ Tip: You can customize this high-level summary in your review settings.</sub>

Authors:
  - Alice Boucher (https://github.com/aliceb-nv)

Approvers:
  - Nicolas Blin (https://github.com/Kh4ster)

URL: https://github.com/NVIDIA/cuopt/pull/691
---
 .../pdlp/solver_settings.hpp                  |  2 +-
 cpp/src/dual_simplex/branch_and_bound.cpp     |  5 ++++
 cpp/src/dual_simplex/branch_and_bound.hpp     |  4 +--
 .../dual_simplex/simplex_solver_settings.hpp  |  4 +--
 cpp/src/linear_programming/solve.cu           |  2 +-
 cpp/src/mip/diversity/diversity_manager.cuh   |  2 +-
 cpp/src/mip/relaxed_lp/relaxed_lp.cuh         | 16 ++++++------
 cpp/src/utilities/logger.cpp                  | 26 ++++++++++++++++---
 cpp/src/utilities/logger.hpp                  |  6 +++--
 9 files changed, 47 insertions(+), 20 deletions(-)

diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
index ea697e1e1..76388504e 100644
--- a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
+++ b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp
@@ -212,7 +212,7 @@ class pdlp_solver_settings_t {
   method_t method{method_t::Concurrent};
   bool inside_mip{false};
   // For concurrent termination
-  volatile int* concurrent_halt{nullptr};
+  std::atomic<int>* concurrent_halt{nullptr};
   static constexpr f_t minimal_absolute_tolerance = 1.0e-12;
 
  private:
diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp
index 77acca8f7..6161f4d3f 100644
--- a/cpp/src/dual_simplex/branch_and_bound.cpp
+++ b/cpp/src/dual_simplex/branch_and_bound.cpp
@@ -1135,6 +1135,7 @@ void branch_and_bound_t<i_t, f_t>::diving_thread(const csr_matrix_t<i_t, f_t>& A
       if (get_upper_bound() < start_node->node.lower_bound) { continue; }
 
       bool recompute_bounds_and_basis = true;
+      i_t nodes_explored              = 0;
       search_tree_t<i_t, f_t> subtree(std::move(start_node->node));
       std::deque<mip_node_t<i_t, f_t>*> stack;
       stack.push_front(&subtree.root);
@@ -1152,6 +1153,8 @@ void branch_and_bound_t<i_t, f_t>::diving_thread(const csr_matrix_t<i_t, f_t>& A
 
         if (toc(exploration_stats_.start_time) > settings_.time_limit) { return; }
 
+        if (nodes_explored >= 1000) { break; }
+
         node_solve_info_t status = solve_node(node_ptr,
                                               subtree,
                                               leaf_problem,
@@ -1165,6 +1168,8 @@ void branch_and_bound_t<i_t, f_t>::diving_thread(const csr_matrix_t<i_t, f_t>& A
                                               start_node->upper,
                                               log);
 
+        nodes_explored++;
+
         recompute_bounds_and_basis = !has_children(status);
 
         if (status == node_solve_info_t::TIME_LIMIT) {
diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp
index 7891711f7..38438cc9e 100644
--- a/cpp/src/dual_simplex/branch_and_bound.hpp
+++ b/cpp/src/dual_simplex/branch_and_bound.hpp
@@ -113,7 +113,7 @@ class branch_and_bound_t {
   f_t get_lower_bound();
   i_t get_heap_size();
   bool enable_concurrent_lp_root_solve() const { return enable_concurrent_lp_root_solve_; }
-  volatile int* get_root_concurrent_halt() { return &root_concurrent_halt_; }
+  std::atomic<int>* get_root_concurrent_halt() { return &root_concurrent_halt_; }
   void set_root_concurrent_halt(int value) { root_concurrent_halt_ = value; }
   lp_status_t solve_root_relaxation(simplex_solver_settings_t<i_t, f_t> const& lp_settings);
 
@@ -170,7 +170,7 @@ class branch_and_bound_t {
   std::vector<f_t> edge_norms_;
   std::atomic<bool> root_crossover_solution_set_{false};
   bool enable_concurrent_lp_root_solve_{false};
-  volatile int root_concurrent_halt_{0};
+  std::atomic<int> root_concurrent_halt_{0};
 
   // Pseudocosts
   pseudo_costs_t<i_t, f_t> pc_;
diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp
index 98be9d4cb..a1cc049e7 100644
--- a/cpp/src/dual_simplex/simplex_solver_settings.hpp
+++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp
@@ -145,8 +145,8 @@ struct simplex_solver_settings_t {
   std::function<void()> heuristic_preemption_callback;
   std::function<void(std::vector<f_t>&, std::vector<f_t>&, f_t)> set_simplex_solution_callback;
   mutable logger_t log;
-  volatile int* concurrent_halt;  // if nullptr ignored, if !nullptr, 0 if solver should
-                                  // continue, 1 if solver should halt
+  std::atomic<int>* concurrent_halt;  // if nullptr ignored, if !nullptr, 0 if solver should
+                                      // continue, 1 if solver should halt
 };
 
 }  // namespace cuopt::linear_programming::dual_simplex
diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu
index ed141a0c4..ab418bf5a 100644
--- a/cpp/src/linear_programming/solve.cu
+++ b/cpp/src/linear_programming/solve.cu
@@ -306,7 +306,7 @@ void setup_device_symbols(rmm::cuda_stream_view stream_view)
   detail::set_pdlp_hyper_parameters(stream_view);
 }
 
-volatile int global_concurrent_halt;
+std::atomic<int> global_concurrent_halt{0};
 
 template <typename i_t, typename f_t>
 optimization_problem_solution_t<i_t, f_t> convert_dual_simplex_sol(
diff --git a/cpp/src/mip/diversity/diversity_manager.cuh b/cpp/src/mip/diversity/diversity_manager.cuh
index 4a78f6cff..9f3b4c90f 100644
--- a/cpp/src/mip/diversity/diversity_manager.cuh
+++ b/cpp/src/mip/diversity/diversity_manager.cuh
@@ -95,7 +95,7 @@ class diversity_manager_t {
   // mutex for the simplex solution update
   std::mutex relaxed_solution_mutex;
   // atomic for signalling pdlp to stop
-  volatile int global_concurrent_halt{0};
+  std::atomic<int> global_concurrent_halt{0};
 
   rins_t<i_t, f_t> rins;
 
diff --git a/cpp/src/mip/relaxed_lp/relaxed_lp.cuh b/cpp/src/mip/relaxed_lp/relaxed_lp.cuh
index 01931a3dd..0094f5982 100644
--- a/cpp/src/mip/relaxed_lp/relaxed_lp.cuh
+++ b/cpp/src/mip/relaxed_lp/relaxed_lp.cuh
@@ -17,14 +17,14 @@
 namespace cuopt::linear_programming::detail {
 
 struct relaxed_lp_settings_t {
-  double tolerance              = 1e-4;
-  double time_limit             = 1.0;
-  bool check_infeasibility      = true;
-  bool return_first_feasible    = false;
-  bool save_state               = true;
-  bool per_constraint_residual  = true;
-  bool has_initial_primal       = true;
-  volatile int* concurrent_halt = nullptr;
+  double tolerance                  = 1e-4;
+  double time_limit                 = 1.0;
+  bool check_infeasibility          = true;
+  bool return_first_feasible        = false;
+  bool save_state                   = true;
+  bool per_constraint_residual      = true;
+  bool has_initial_primal           = true;
+  std::atomic<int>* concurrent_halt = nullptr;
 };
 
 template <typename i_t, typename f_t>
diff --git a/cpp/src/utilities/logger.cpp b/cpp/src/utilities/logger.cpp
index a16c49c11..217f9c64c 100644
--- a/cpp/src/utilities/logger.cpp
+++ b/cpp/src/utilities/logger.cpp
@@ -137,9 +137,26 @@ void reset_default_logger()
   default_logger().flush_on(rapids_logger::level_enum::debug);
 }
 
+// Guard object whose destructor resets the logger
+struct logger_config_guard {
+  ~logger_config_guard() { cuopt::reset_default_logger(); }
+};
+
+// Weak reference to detect if any init_logger_t instance is still alive
+static std::weak_ptr<logger_config_guard> g_active_guard;
+static std::mutex g_guard_mutex;
+
 init_logger_t::init_logger_t(std::string log_file, bool log_to_console)
 {
-  // until this function is called, the default sink is the buffer sink
+  std::lock_guard<std::mutex> lock(g_guard_mutex);
+
+  auto existing_guard = g_active_guard.lock();
+  if (existing_guard) {
+    // Reuse existing configuration, just hold a reference to keep it alive
+    guard_ = existing_guard;
+    return;
+  }
+
   cuopt::default_logger().sinks().clear();
 
   // re-initialize sinks
@@ -164,8 +181,11 @@ init_logger_t::init_logger_t(std::string log_file, bool log_to_console)
   for (const auto& entry : buffered_messages) {
     cuopt::default_logger().log(entry.level, entry.msg.c_str());
   }
-}
 
-init_logger_t::~init_logger_t() { cuopt::reset_default_logger(); }
+  // Create guard and store weak reference for future instances to find
+  auto guard     = std::make_shared<logger_config_guard>();
+  g_active_guard = guard;
+  guard_         = guard;
+}
 
 }  // namespace cuopt
diff --git a/cpp/src/utilities/logger.hpp b/cpp/src/utilities/logger.hpp
index 13c5e36e3..08556a4c7 100644
--- a/cpp/src/utilities/logger.hpp
+++ b/cpp/src/utilities/logger.hpp
@@ -33,11 +33,13 @@ rapids_logger::logger& default_logger();
  */
 void reset_default_logger();
 
+// Ref-counted logger initializer
 class init_logger_t {
+  // Using shared_ptr for ref-counting
+  std::shared_ptr<void> guard_;
+
  public:
   init_logger_t(std::string log_file, bool log_to_console);
-
-  ~init_logger_t();
 };
 
 }  // namespace cuopt

From d97ff6b4762f9df4459534ccf56d70c8e172bc81 Mon Sep 17 00:00:00 2001
From: Jake Awe <jawe@nvidia.com>
Date: Thu, 11 Dec 2025 13:28:18 -0600
Subject: [PATCH 12/27] REL v25.12.00 release

---
 .pre-commit-config.yaml                       |  9 ++-
 .../all_cuda-129_arch-aarch64.yaml            | 14 ++---
 .../all_cuda-129_arch-x86_64.yaml             | 14 ++---
 .../all_cuda-130_arch-aarch64.yaml            | 14 ++---
 .../all_cuda-130_arch-x86_64.yaml             | 14 ++---
 dependencies.yaml                             | 58 +++++++++----------
 .../cuopt/linear_programming/pyproject.toml   |  6 +-
 python/cuopt/pyproject.toml                   | 26 ++++-----
 python/cuopt_self_hosted/pyproject.toml       |  2 +-
 python/cuopt_server/pyproject.toml            |  2 +-
 python/libcuopt/pyproject.toml                | 12 ++--
 11 files changed, 87 insertions(+), 84 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 799eb291f..447b03a32 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -56,6 +56,11 @@ repos:
   - repo: https://github.com/rapidsai/pre-commit-hooks
     rev: v1.2.1
     hooks:
+      - id: verify-alpha-spec
+        args:
+          - --fix
+          - --mode
+          - release
       - id: verify-copyright
         args: [--fix, --spdx]
         files: |
@@ -83,7 +88,5 @@ repos:
         entry: python ci/utils/update_doc_versions.py
         language: system
         files: docs/cuopt/source/versions1.json
-
-
 default_language_version:
-      python: python3
+  python: python3
diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index 59d6b43c0..7a4a54a1d 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -19,7 +19,7 @@ dependencies:
 - cuda-python>=12.9.2,<13.0a0
 - cuda-sanitizer-api
 - cuda-version=12.9
-- cudf==25.12.*,>=0.0.0a0
+- cudf==25.12.*
 - cupy>=13.6.0
 - cxx-compiler
 - cython>=3.0.3
@@ -35,8 +35,8 @@ dependencies:
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
-- libraft-headers==25.12.*,>=0.0.0a0
-- librmm==25.12.*,>=0.0.0a0
+- libraft-headers==25.12.*
+- librmm==25.12.*
 - make
 - msgpack-numpy==0.4.8
 - msgpack-python==1.1.0
@@ -53,16 +53,16 @@ dependencies:
 - pip
 - pre-commit
 - psutil>=6.0.0
-- pylibraft==25.12.*,>=0.0.0a0
+- pylibraft==25.12.*
 - pyrsistent
 - pytest-cov
 - pytest<8
 - python>=3.10,<3.14
 - rapids-build-backend>=0.4.0,<0.5.0.dev0
-- rapids-dask-dependency==25.12.*,>=0.0.0a0
-- rapids-logger==0.2.*,>=0.0.0a0
+- rapids-dask-dependency==25.12.*
+- rapids-logger==0.2.*
 - requests
-- rmm==25.12.*,>=0.0.0a0
+- rmm==25.12.*
 - scikit-build-core>=0.10.0
 - sphinx
 - sphinx-copybutton
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index 4aad50d00..6c5ae382e 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -19,7 +19,7 @@ dependencies:
 - cuda-python>=12.9.2,<13.0a0
 - cuda-sanitizer-api
 - cuda-version=12.9
-- cudf==25.12.*,>=0.0.0a0
+- cudf==25.12.*
 - cupy>=13.6.0
 - cxx-compiler
 - cython>=3.0.3
@@ -35,8 +35,8 @@ dependencies:
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
-- libraft-headers==25.12.*,>=0.0.0a0
-- librmm==25.12.*,>=0.0.0a0
+- libraft-headers==25.12.*
+- librmm==25.12.*
 - make
 - msgpack-numpy==0.4.8
 - msgpack-python==1.1.0
@@ -53,16 +53,16 @@ dependencies:
 - pip
 - pre-commit
 - psutil>=6.0.0
-- pylibraft==25.12.*,>=0.0.0a0
+- pylibraft==25.12.*
 - pyrsistent
 - pytest-cov
 - pytest<8
 - python>=3.10,<3.14
 - rapids-build-backend>=0.4.0,<0.5.0.dev0
-- rapids-dask-dependency==25.12.*,>=0.0.0a0
-- rapids-logger==0.2.*,>=0.0.0a0
+- rapids-dask-dependency==25.12.*
+- rapids-logger==0.2.*
 - requests
-- rmm==25.12.*,>=0.0.0a0
+- rmm==25.12.*
 - scikit-build-core>=0.10.0
 - sphinx
 - sphinx-copybutton
diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml
index 5adb8aeae..82c250f01 100644
--- a/conda/environments/all_cuda-130_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-130_arch-aarch64.yaml
@@ -19,7 +19,7 @@ dependencies:
 - cuda-python>=13.0.1,<14.0a0
 - cuda-sanitizer-api
 - cuda-version=13.0
-- cudf==25.12.*,>=0.0.0a0
+- cudf==25.12.*
 - cupy>=13.6.0
 - cxx-compiler
 - cython>=3.0.3
@@ -35,8 +35,8 @@ dependencies:
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
-- libraft-headers==25.12.*,>=0.0.0a0
-- librmm==25.12.*,>=0.0.0a0
+- libraft-headers==25.12.*
+- librmm==25.12.*
 - make
 - msgpack-numpy==0.4.8
 - msgpack-python==1.1.0
@@ -53,16 +53,16 @@ dependencies:
 - pip
 - pre-commit
 - psutil>=6.0.0
-- pylibraft==25.12.*,>=0.0.0a0
+- pylibraft==25.12.*
 - pyrsistent
 - pytest-cov
 - pytest<8
 - python>=3.10,<3.14
 - rapids-build-backend>=0.4.0,<0.5.0.dev0
-- rapids-dask-dependency==25.12.*,>=0.0.0a0
-- rapids-logger==0.2.*,>=0.0.0a0
+- rapids-dask-dependency==25.12.*
+- rapids-logger==0.2.*
 - requests
-- rmm==25.12.*,>=0.0.0a0
+- rmm==25.12.*
 - scikit-build-core>=0.10.0
 - sphinx
 - sphinx-copybutton
diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml
index 11c24d589..e89e322d6 100644
--- a/conda/environments/all_cuda-130_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-130_arch-x86_64.yaml
@@ -19,7 +19,7 @@ dependencies:
 - cuda-python>=13.0.1,<14.0a0
 - cuda-sanitizer-api
 - cuda-version=13.0
-- cudf==25.12.*,>=0.0.0a0
+- cudf==25.12.*
 - cupy>=13.6.0
 - cxx-compiler
 - cython>=3.0.3
@@ -35,8 +35,8 @@ dependencies:
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
-- libraft-headers==25.12.*,>=0.0.0a0
-- librmm==25.12.*,>=0.0.0a0
+- libraft-headers==25.12.*
+- librmm==25.12.*
 - make
 - msgpack-numpy==0.4.8
 - msgpack-python==1.1.0
@@ -53,16 +53,16 @@ dependencies:
 - pip
 - pre-commit
 - psutil>=6.0.0
-- pylibraft==25.12.*,>=0.0.0a0
+- pylibraft==25.12.*
 - pyrsistent
 - pytest-cov
 - pytest<8
 - python>=3.10,<3.14
 - rapids-build-backend>=0.4.0,<0.5.0.dev0
-- rapids-dask-dependency==25.12.*,>=0.0.0a0
-- rapids-logger==0.2.*,>=0.0.0a0
+- rapids-dask-dependency==25.12.*
+- rapids-logger==0.2.*
 - requests
-- rmm==25.12.*,>=0.0.0a0
+- rmm==25.12.*
 - scikit-build-core>=0.10.0
 - sphinx
 - sphinx-copybutton
diff --git a/dependencies.yaml b/dependencies.yaml
index 393b99293..c46f11b74 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -297,8 +297,8 @@ dependencies:
         packages:
           - boost
           - cpp-argparse
-          - librmm==25.12.*,>=0.0.0a0
-          - libraft-headers==25.12.*,>=0.0.0a0
+          - librmm==25.12.*
+          - libraft-headers==25.12.*
           - tbb-devel
           - zlib
           - bzip2
@@ -313,7 +313,7 @@ dependencies:
     common:
       - output_types: [conda]
         packages:
-          - libcuopt-tests==25.12.*,>=0.0.0a0
+          - libcuopt-tests==25.12.*
   build_wheels:
     common:
       - output_types: [requirements, pyproject]
@@ -350,7 +350,7 @@ dependencies:
         packages:
           - numba-cuda>=0.19.1,<0.20.0a0
           - numba>=0.60.0
-          - rapids-dask-dependency==25.12.*,>=0.0.0a0
+          - rapids-dask-dependency==25.12.*
           - &pandas pandas>=2.0
       - output_types: requirements
         packages:
@@ -420,7 +420,7 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - &libcuopt_unsuffixed libcuopt==25.12.*,>=0.0.0a0
+          - &libcuopt_unsuffixed libcuopt==25.12.*
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -433,18 +433,18 @@ dependencies:
               cuda: "12.*"
               cuda_suffixed: "true"
             packages:
-              - libcuopt-cu12==25.12.*,>=0.0.0a0
+              - libcuopt-cu12==25.12.*
           - matrix:
               cuda: "13.*"
               cuda_suffixed: "true"
             packages:
-              - libcuopt-cu13==25.12.*,>=0.0.0a0
+              - libcuopt-cu13==25.12.*
           - {matrix: null, packages: [*libcuopt_unsuffixed]}
   depends_on_cuopt:
     common:
       - output_types: conda
         packages:
-          - &cuopt_unsuffixed cuopt==25.12.*,>=0.0.0a0
+          - &cuopt_unsuffixed cuopt==25.12.*
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -457,18 +457,18 @@ dependencies:
               cuda: "12.*"
               cuda_suffixed: "true"
             packages:
-              - cuopt-cu12==25.12.*,>=0.0.0a0
+              - cuopt-cu12==25.12.*
           - matrix:
               cuda: "13.*"
               cuda_suffixed: "true"
             packages:
-              - cuopt-cu13==25.12.*,>=0.0.0a0
+              - cuopt-cu13==25.12.*
           - {matrix: null, packages: [*cuopt_unsuffixed]}
   depends_on_cuopt_server:
     common:
       - output_types: conda
         packages:
-          - &cuopt_server_unsuffixed cuopt-server==25.12.*,>=0.0.0a0
+          - &cuopt_server_unsuffixed cuopt-server==25.12.*
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -481,18 +481,18 @@ dependencies:
               cuda: "12.*"
               cuda_suffixed: "true"
             packages:
-              - cuopt-server-cu12==25.12.*,>=0.0.0a0
+              - cuopt-server-cu12==25.12.*
           - matrix:
               cuda: "13.*"
               cuda_suffixed: "true"
             packages:
-              - cuopt-server-cu13==25.12.*,>=0.0.0a0
+              - cuopt-server-cu13==25.12.*
           - {matrix: null, packages: [*cuopt_server_unsuffixed]}
   depends_on_cuopt_sh_client:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - &cuopt_sh_client_unsuffixed cuopt-sh-client==25.12.*,>=0.0.0a0
+          - &cuopt_sh_client_unsuffixed cuopt-sh-client==25.12.*
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -502,7 +502,7 @@ dependencies:
     common:
       - output_types: [requirements, pyproject, conda]
         packages:
-          - cuopt-mps-parser==25.12.*,>=0.0.0a0
+          - cuopt-mps-parser==25.12.*
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -512,12 +512,12 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - libraft-headers==25.12.*,>=0.0.0a0
+          - libraft-headers==25.12.*
   depends_on_librmm:
     common:
       - output_types: conda
         packages:
-          - &librmm_unsuffixed librmm==25.12.*,>=0.0.0a0
+          - &librmm_unsuffixed librmm==25.12.*
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -530,12 +530,12 @@ dependencies:
               cuda: "12.*"
               cuda_suffixed: "true"
             packages:
-              - librmm-cu12==25.12.*,>=0.0.0a0
+              - librmm-cu12==25.12.*
           - matrix:
               cuda: "13.*"
               cuda_suffixed: "true"
             packages:
-              - librmm-cu13==25.12.*,>=0.0.0a0
+              - librmm-cu13==25.12.*
           - {matrix: null, packages: [*librmm_unsuffixed]}
   depends_on_cupy:
     common:
@@ -560,7 +560,7 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - rapids-logger==0.2.*,>=0.0.0a0
+          - rapids-logger==0.2.*
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -570,7 +570,7 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - &rmm_unsuffixed rmm==25.12.*,>=0.0.0a0
+          - &rmm_unsuffixed rmm==25.12.*
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -583,12 +583,12 @@ dependencies:
               cuda: "12.*"
               cuda_suffixed: "true"
             packages:
-              - rmm-cu12==25.12.*,>=0.0.0a0
+              - rmm-cu12==25.12.*
           - matrix:
               cuda: "13.*"
               cuda_suffixed: "true"
             packages:
-              - rmm-cu13==25.12.*,>=0.0.0a0
+              - rmm-cu13==25.12.*
           - matrix:
             packages:
               - *rmm_unsuffixed
@@ -597,7 +597,7 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - &cudf_unsuffixed cudf==25.12.*,>=0.0.0a0
+          - &cudf_unsuffixed cudf==25.12.*
       - output_types: requirements
         packages:
           - --extra-index-url=https://pypi.nvidia.com
@@ -609,12 +609,12 @@ dependencies:
               cuda: "12.*"
               cuda_suffixed: "true"
             packages:
-              - cudf-cu12==25.12.*,>=0.0.0a0
+              - cudf-cu12==25.12.*
           - matrix:
               cuda: "13.*"
               cuda_suffixed: "true"
             packages:
-              - cudf-cu13==25.12.*,>=0.0.0a0
+              - cudf-cu13==25.12.*
           - matrix:
             packages:
               - *cudf_unsuffixed
@@ -623,7 +623,7 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - &pylibraft_unsuffixed pylibraft==25.12.*,>=0.0.0a0
+          - &pylibraft_unsuffixed pylibraft==25.12.*
       - output_types: requirements
         packages:
           - --extra-index-url=https://pypi.nvidia.com
@@ -635,12 +635,12 @@ dependencies:
               cuda: "12.*"
               cuda_suffixed: "true"
             packages:
-              - pylibraft-cu12==25.12.*,>=0.0.0a0
+              - pylibraft-cu12==25.12.*
           - matrix:
               cuda: "13.*"
               cuda_suffixed: "true"
             packages:
-              - pylibraft-cu13==25.12.*,>=0.0.0a0
+              - pylibraft-cu13==25.12.*
           - matrix:
             packages:
               - *pylibraft_unsuffixed
diff --git a/python/cuopt/cuopt/linear_programming/pyproject.toml b/python/cuopt/cuopt/linear_programming/pyproject.toml
index c11cd3a58..24907d83f 100644
--- a/python/cuopt/cuopt/linear_programming/pyproject.toml
+++ b/python/cuopt/cuopt/linear_programming/pyproject.toml
@@ -20,7 +20,7 @@ license = { text = "Apache-2.0" }
 requires-python = ">=3.10"
 dependencies = [
     "numpy>=1.23.5,<3.0a0",
-    "rapids-logger==0.2.*,>=0.0.0a0",
+    "rapids-logger==0.2.*",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
     "Intended Audience :: Developers",
@@ -39,7 +39,7 @@ Source = "https://github.com/nvidia/cuopt"
 test = [
     "pytest-cov",
     "pytest<8",
-    "rapids-logger==0.2.*,>=0.0.0a0",
+    "rapids-logger==0.2.*",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [tool.setuptools]
@@ -83,5 +83,5 @@ requires = [
     "cython>=3.0.3",
     "ninja",
     "numpy>=1.23.5,<3.0a0",
-    "rapids-logger==0.2.*,>=0.0.0a0",
+    "rapids-logger==0.2.*",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/cuopt/pyproject.toml b/python/cuopt/pyproject.toml
index f3f2dbbf8..f27e3f23a 100644
--- a/python/cuopt/pyproject.toml
+++ b/python/cuopt/pyproject.toml
@@ -20,18 +20,18 @@ license = { text = "Apache-2.0" }
 requires-python = ">=3.10"
 dependencies = [
     "cuda-python>=13.0.1,<14.0a0",
-    "cudf==25.12.*,>=0.0.0a0",
-    "cuopt-mps-parser==25.12.*,>=0.0.0a0",
+    "cudf==25.12.*",
+    "cuopt-mps-parser==25.12.*",
     "cupy-cuda13x>=13.6.0",
-    "libcuopt==25.12.*,>=0.0.0a0",
+    "libcuopt==25.12.*",
     "numba-cuda>=0.19.1,<0.20.0a0",
     "numba>=0.60.0",
     "numpy>=1.23.5,<3.0a0",
     "pandas>=2.0",
-    "pylibraft==25.12.*,>=0.0.0a0",
-    "rapids-dask-dependency==25.12.*,>=0.0.0a0",
-    "rapids-logger==0.2.*,>=0.0.0a0",
-    "rmm==25.12.*,>=0.0.0a0",
+    "pylibraft==25.12.*",
+    "rapids-dask-dependency==25.12.*",
+    "rapids-logger==0.2.*",
+    "rmm==25.12.*",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
     "Intended Audience :: Developers",
@@ -48,7 +48,7 @@ test = [
     "numpy>=1.23.5,<3.0a0",
     "pytest-cov",
     "pytest<8",
-    "rapids-logger==0.2.*,>=0.0.0a0",
+    "rapids-logger==0.2.*",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [project.urls]
@@ -112,12 +112,12 @@ dependencies-file = "../../dependencies.yaml"
 matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true"
 requires = [
     "cmake>=3.30.4",
-    "cuopt-mps-parser==25.12.*,>=0.0.0a0",
+    "cuopt-mps-parser==25.12.*",
     "cupy-cuda13x>=13.6.0",
     "cython>=3.0.3",
-    "libcuopt==25.12.*,>=0.0.0a0",
+    "libcuopt==25.12.*",
     "ninja",
-    "pylibraft==25.12.*,>=0.0.0a0",
-    "rapids-logger==0.2.*,>=0.0.0a0",
-    "rmm==25.12.*,>=0.0.0a0",
+    "pylibraft==25.12.*",
+    "rapids-logger==0.2.*",
+    "rmm==25.12.*",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/cuopt_self_hosted/pyproject.toml b/python/cuopt_self_hosted/pyproject.toml
index 5b94fb74f..79b2de079 100644
--- a/python/cuopt_self_hosted/pyproject.toml
+++ b/python/cuopt_self_hosted/pyproject.toml
@@ -19,7 +19,7 @@ authors = [
 license = { text = "Apache-2.0" }
 requires-python = ">=3.10"
 dependencies = [
-    "cuopt-mps-parser==25.12.*,>=0.0.0a0",
+    "cuopt-mps-parser==25.12.*",
     "msgpack-numpy==0.4.8",
     "msgpack==1.1.0",
     "requests",
diff --git a/python/cuopt_server/pyproject.toml b/python/cuopt_server/pyproject.toml
index 4e4dc15a2..cbf9429c7 100644
--- a/python/cuopt_server/pyproject.toml
+++ b/python/cuopt_server/pyproject.toml
@@ -20,7 +20,7 @@ authors = [
 license = { text = "Apache-2.0" }
 requires-python = ">=3.10"
 dependencies = [
-    "cuopt==25.12.*,>=0.0.0a0",
+    "cuopt==25.12.*",
     "cupy-cuda13x>=13.6.0",
     "fastapi",
     "jsonref==1.1.0",
diff --git a/python/libcuopt/pyproject.toml b/python/libcuopt/pyproject.toml
index 1fdd8ad2c..5e955f3ba 100644
--- a/python/libcuopt/pyproject.toml
+++ b/python/libcuopt/pyproject.toml
@@ -31,8 +31,8 @@ classifiers = [
     "Programming Language :: Python :: 3.13",
 ]
 dependencies = [
-    "cuopt-mps-parser==25.12.*,>=0.0.0a0",
-    "librmm==25.12.*,>=0.0.0a0",
+    "cuopt-mps-parser==25.12.*",
+    "librmm==25.12.*",
     "nvidia-cublas",
     "nvidia-cudart",
     "nvidia-cudss",
@@ -40,7 +40,7 @@ dependencies = [
     "nvidia-cusolver",
     "nvidia-cusparse",
     "nvidia-nvtx",
-    "rapids-logger==0.2.*,>=0.0.0a0",
+    "rapids-logger==0.2.*",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [project.urls]
@@ -82,8 +82,8 @@ dependencies-file = "../../dependencies.yaml"
 matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true"
 requires = [
     "cmake>=3.30.4",
-    "cuopt-mps-parser==25.12.*,>=0.0.0a0",
-    "librmm==25.12.*,>=0.0.0a0",
+    "cuopt-mps-parser==25.12.*",
+    "librmm==25.12.*",
     "ninja",
-    "rapids-logger==0.2.*,>=0.0.0a0",
+    "rapids-logger==0.2.*",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.

From f341e345d2a11736b18c2c787e70ed6d494e4b95 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Thu, 11 Dec 2025 18:52:50 -0800
Subject: [PATCH 13/27] Fix incorrect infeasible list

---
 cpp/src/dual_simplex/basis_solves.cpp  | 14 +++++++-
 cpp/src/dual_simplex/basis_solves.hpp  |  2 ++
 cpp/src/dual_simplex/basis_updates.cpp |  4 ++-
 cpp/src/dual_simplex/basis_updates.hpp |  2 ++
 cpp/src/dual_simplex/crossover.cpp     |  6 ++--
 cpp/src/dual_simplex/phase2.cpp        | 45 ++++++++++++++------------
 cpp/src/dual_simplex/primal.cpp        |  2 +-
 7 files changed, 49 insertions(+), 26 deletions(-)

diff --git a/cpp/src/dual_simplex/basis_solves.cpp b/cpp/src/dual_simplex/basis_solves.cpp
index db24f55a2..3080f269d 100644
--- a/cpp/src/dual_simplex/basis_solves.cpp
+++ b/cpp/src/dual_simplex/basis_solves.cpp
@@ -613,6 +613,8 @@ i_t factorize_basis(const csc_matrix_t<i_t, f_t>& A,
 template <typename i_t, typename f_t>
 i_t basis_repair(const csc_matrix_t<i_t, f_t>& A,
                  const simplex_solver_settings_t<i_t, f_t>& settings,
+                 const std::vector<f_t>& lower,
+                 const std::vector<f_t>& upper,
                  const std::vector<i_t>& deficient,
                  const std::vector<i_t>& slacks_needed,
                  std::vector<i_t>& basis_list,
@@ -658,7 +660,15 @@ i_t basis_repair(const csc_matrix_t<i_t, f_t>& A,
     nonbasic_list[nonbasic_map[replace_j]] = bad_j;
     vstatus[replace_j]                     = variable_status_t::BASIC;
     // This is the main issue. What value should bad_j take on.
-    vstatus[bad_j] = variable_status_t::NONBASIC_FREE;
+    if (lower[bad_j] == -inf && upper[bad_j] == inf) {
+      vstatus[bad_j] = variable_status_t::NONBASIC_FREE;
+    } else if (lower[bad_j] > -inf) {
+      vstatus[bad_j] = variable_status_t::NONBASIC_LOWER;
+    } else if (upper[bad_j] < inf) {
+      vstatus[bad_j] = variable_status_t::NONBASIC_UPPER;
+    } else {
+      assert(1 == 0);
+    }
   }
 
   return 0;
@@ -849,6 +859,8 @@ template int factorize_basis<int>(const csc_matrix_t<int, double>& A,
 
 template int basis_repair<int, double>(const csc_matrix_t<int, double>& A,
                                        const simplex_solver_settings_t<int, double>& settings,
+                                       const std::vector<double>& lower,
+                                       const std::vector<double>& upper,
                                        const std::vector<int>& deficient,
                                        const std::vector<int>& slacks_needed,
                                        std::vector<int>& basis_list,
diff --git a/cpp/src/dual_simplex/basis_solves.hpp b/cpp/src/dual_simplex/basis_solves.hpp
index b668c0f46..0745806a6 100644
--- a/cpp/src/dual_simplex/basis_solves.hpp
+++ b/cpp/src/dual_simplex/basis_solves.hpp
@@ -42,6 +42,8 @@ i_t factorize_basis(const csc_matrix_t<i_t, f_t>& A,
 template <typename i_t, typename f_t>
 i_t basis_repair(const csc_matrix_t<i_t, f_t>& A,
                  const simplex_solver_settings_t<i_t, f_t>& settings,
+                 const std::vector<f_t>& lower,
+                 const std::vector<f_t>& upper,
                  const std::vector<i_t>& deficient,
                  const std::vector<i_t>& slacks_needed,
                  std::vector<i_t>& basis_list,
diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp
index 6b79f3c86..11056a65e 100644
--- a/cpp/src/dual_simplex/basis_updates.cpp
+++ b/cpp/src/dual_simplex/basis_updates.cpp
@@ -2046,6 +2046,8 @@ template <typename i_t, typename f_t>
 int basis_update_mpf_t<i_t, f_t>::refactor_basis(
   const csc_matrix_t<i_t, f_t>& A,
   const simplex_solver_settings_t<i_t, f_t>& settings,
+  const std::vector<f_t>& lower,
+  const std::vector<f_t>& upper,
   std::vector<i_t>& basic_list,
   std::vector<i_t>& nonbasic_list,
   std::vector<variable_status_t>& vstatus)
@@ -2066,7 +2068,7 @@ int basis_update_mpf_t<i_t, f_t>::refactor_basis(
                       deficient,
                       slacks_needed) == -1) {
     settings.log.debug("Initial factorization failed\n");
-    basis_repair(A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
+    basis_repair(A, settings, lower, upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
 
 #ifdef CHECK_BASIS_REPAIR
     const i_t m = A.m;
diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp
index cea907074..9b5d3e614 100644
--- a/cpp/src/dual_simplex/basis_updates.hpp
+++ b/cpp/src/dual_simplex/basis_updates.hpp
@@ -373,6 +373,8 @@ class basis_update_mpf_t {
   // Compute L*U = A(p, basic_list)
   int refactor_basis(const csc_matrix_t<i_t, f_t>& A,
                      const simplex_solver_settings_t<i_t, f_t>& settings,
+                     const std::vector<f_t>& lower,
+                     const std::vector<f_t>& upper,
                      std::vector<i_t>& basic_list,
                      std::vector<i_t>& nonbasic_list,
                      std::vector<variable_status_t>& vstatus);
diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp
index 23d9a0e8e..3dd61b152 100644
--- a/cpp/src/dual_simplex/crossover.cpp
+++ b/cpp/src/dual_simplex/crossover.cpp
@@ -786,7 +786,7 @@ i_t primal_push(const lp_problem_t<i_t, f_t>& lp,
         if (rank != m) {
           settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m);
           basis_repair(
-            lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
+            lp.A, settings, lp.lower, lp.upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
           if (factorize_basis(
                 lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) {
             settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m);
@@ -1132,7 +1132,7 @@ crossover_status_t crossover(const lp_problem_t<i_t, f_t>& lp,
   rank = factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed);
   if (rank != m) {
     settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m);
-    basis_repair(lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
+    basis_repair(lp.A, settings, lp.lower, lp.upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
     if (factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) ==
         -1) {
       settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m);
@@ -1323,7 +1323,7 @@ crossover_status_t crossover(const lp_problem_t<i_t, f_t>& lp,
         factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed);
       if (rank != m) {
         settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m);
-        basis_repair(lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
+        basis_repair(lp.A, settings, lp.lower, lp.upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
         if (factorize_basis(
               lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) {
           settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m);
diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp
index 56298ef4d..e0ac7239e 100644
--- a/cpp/src/dual_simplex/phase2.cpp
+++ b/cpp/src/dual_simplex/phase2.cpp
@@ -623,14 +623,17 @@ f_t compute_initial_primal_infeasibilities(const lp_problem_t<i_t, f_t>& lp,
                                            const std::vector<i_t>& basic_list,
                                            const std::vector<f_t>& x,
                                            std::vector<f_t>& squared_infeasibilities,
-                                           std::vector<i_t>& infeasibility_indices)
+                                           std::vector<i_t>& infeasibility_indices,
+                                           f_t& primal_inf)
 {
   const i_t m = lp.num_rows;
   const i_t n = lp.num_cols;
-  squared_infeasibilities.resize(n, 0.0);
+  squared_infeasibilities.resize(n);
+  std::fill(squared_infeasibilities.begin(), squared_infeasibilities.end(), 0.0);
   infeasibility_indices.reserve(n);
   infeasibility_indices.clear();
-  f_t primal_inf = 0.0;
+  f_t primal_inf_squared = 0.0;
+  primal_inf = 0.0;
   for (i_t k = 0; k < m; ++k) {
     const i_t j            = basic_list[k];
     const f_t lower_infeas = lp.lower[j] - x[j];
@@ -640,10 +643,11 @@ f_t compute_initial_primal_infeasibilities(const lp_problem_t<i_t, f_t>& lp,
       const f_t square_infeas    = infeas * infeas;
       squared_infeasibilities[j] = square_infeas;
       infeasibility_indices.push_back(j);
-      primal_inf += square_infeas;
+      primal_inf_squared += square_infeas;
+      primal_inf += infeas;
     }
   }
-  return primal_inf;
+  return primal_inf_squared;
 }
 
 template <typename i_t, typename f_t>
@@ -2241,7 +2245,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
     assert(superbasic_list.size() == 0);
     assert(nonbasic_list.size() == n - m);
 
-    if (ft.refactor_basis(lp.A, settings, basic_list, nonbasic_list, vstatus) > 0) {
+    if (ft.refactor_basis(lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus) > 0) {
       return dual::status_t::NUMERICAL;
     }
 
@@ -2268,7 +2272,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
 
 #ifdef COMPUTE_DUAL_RESIDUAL
   std::vector<f_t> dual_res1;
-  compute_dual_residual(lp.A, objective, y, z, dual_res1);
+  phase2::compute_dual_residual(lp.A, objective, y, z, dual_res1);
   f_t dual_res_norm = vector_norm_inf<i_t, f_t>(dual_res1);
   if (dual_res_norm > settings.tight_tol) {
     settings.log.printf("|| A'*y + z - c || %e\n", dual_res_norm);
@@ -2357,8 +2361,9 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
   std::vector<uint8_t> bounded_variables(n, 0);
   phase2::compute_bounded_info(lp.lower, lp.upper, bounded_variables);
 
-  f_t primal_infeasibility = phase2::compute_initial_primal_infeasibilities(
-    lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices);
+  f_t primal_infeasibility;
+  f_t primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities(
+    lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices, primal_infeasibility);
 
 #ifdef CHECK_BASIC_INFEASIBILITIES
   phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 0);
@@ -2557,8 +2562,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
             phase2::compute_primal_solution_from_basis(
               lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x);
             x                    = unperturbed_x;
-            primal_infeasibility = phase2::compute_initial_primal_infeasibilities(
-              lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices);
+            primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities(
+              lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices, primal_infeasibility);
             settings.log.printf("Updated primal infeasibility: %e\n", primal_infeasibility);
 
             objective = lp.objective;
@@ -2594,8 +2599,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
             phase2::compute_primal_solution_from_basis(
               lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x);
             x                    = unperturbed_x;
-            primal_infeasibility = phase2::compute_initial_primal_infeasibilities(
-              lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices);
+            primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities(
+              lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices, primal_infeasibility);
 
             const f_t orig_dual_infeas = phase2::dual_infeasibility(
               lp, settings, vstatus, z, settings.tight_tol, settings.dual_tol);
@@ -2810,7 +2815,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
                                           delta_xB_0_sparse.i,
                                           squared_infeasibilities,
                                           infeasibility_indices,
-                                          primal_infeasibility);
+                                          primal_infeasibility_squared);
     // Update primal infeasibilities due to changes in basic variables
     // from the leaving and entering variables
     phase2::update_primal_infeasibilities(lp,
@@ -2822,7 +2827,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
                                           scaled_delta_xB_sparse.i,
                                           squared_infeasibilities,
                                           infeasibility_indices,
-                                          primal_infeasibility);
+                                          primal_infeasibility_squared);
     // Update the entering variable
     phase2::update_single_primal_infeasibility(lp.lower,
                                                lp.upper,
@@ -2883,14 +2888,14 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
 #endif
     if (should_refactor) {
       bool should_recompute_x = false;
-      if (ft.refactor_basis(lp.A, settings, basic_list, nonbasic_list, vstatus) > 0) {
+      if (ft.refactor_basis(lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus) > 0) {
         should_recompute_x = true;
         settings.log.printf("Failed to factorize basis. Iteration %d\n", iter);
         if (toc(start_time) > settings.time_limit) { return dual::status_t::TIME_LIMIT; }
         i_t count = 0;
         i_t deficient_size;
         while ((deficient_size =
-                  ft.refactor_basis(lp.A, settings, basic_list, nonbasic_list, vstatus)) > 0) {
+                  ft.refactor_basis(lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus)) > 0) {
           settings.log.printf("Failed to repair basis. Iteration %d. %d deficient columns.\n",
                               iter,
                               static_cast<int>(deficient_size));
@@ -2912,8 +2917,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
           lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x);
         x = unperturbed_x;
       }
-      phase2::compute_initial_primal_infeasibilities(
-        lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices);
+      primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities(
+        lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices, primal_infeasibility);
     }
 #ifdef CHECK_BASIC_INFEASIBILITIES
     phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 7);
@@ -2951,7 +2956,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
                           iter,
                           compute_user_objective(lp, obj),
                           infeasibility_indices.size(),
-                          primal_infeasibility,
+                          primal_infeasibility_squared,
                           sum_perturb,
                           now);
     }
diff --git a/cpp/src/dual_simplex/primal.cpp b/cpp/src/dual_simplex/primal.cpp
index 80406dcf0..445177fac 100644
--- a/cpp/src/dual_simplex/primal.cpp
+++ b/cpp/src/dual_simplex/primal.cpp
@@ -298,7 +298,7 @@ primal::status_t primal_phase2(i_t phase,
     factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed);
   if (rank != m) {
     settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m);
-    basis_repair(lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
+    basis_repair(lp.A, settings, lp.lower, lp.upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
     if (factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) ==
         -1) {
       settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m);

From b8e99593006c50e1be680a2f3f03b1128a31ee8a Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Fri, 12 Dec 2025 15:08:47 -0800
Subject: [PATCH 14/27] Style fixes

---
 cpp/src/dual_simplex/basis_updates.cpp |  3 +-
 cpp/src/dual_simplex/crossover.cpp     | 31 ++++++++++++--
 cpp/src/dual_simplex/phase2.cpp        | 56 +++++++++++++++++++-------
 cpp/src/dual_simplex/primal.cpp        | 10 ++++-
 4 files changed, 79 insertions(+), 21 deletions(-)

diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp
index 11056a65e..e44e3b21c 100644
--- a/cpp/src/dual_simplex/basis_updates.cpp
+++ b/cpp/src/dual_simplex/basis_updates.cpp
@@ -2068,7 +2068,8 @@ int basis_update_mpf_t<i_t, f_t>::refactor_basis(
                       deficient,
                       slacks_needed) == -1) {
     settings.log.debug("Initial factorization failed\n");
-    basis_repair(A, settings, lower, upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
+    basis_repair(
+      A, settings, lower, upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
 
 #ifdef CHECK_BASIS_REPAIR
     const i_t m = A.m;
diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp
index 3dd61b152..41844729e 100644
--- a/cpp/src/dual_simplex/crossover.cpp
+++ b/cpp/src/dual_simplex/crossover.cpp
@@ -785,8 +785,15 @@ i_t primal_push(const lp_problem_t<i_t, f_t>& lp,
           factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed);
         if (rank != m) {
           settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m);
-          basis_repair(
-            lp.A, settings, lp.lower, lp.upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
+          basis_repair(lp.A,
+                       settings,
+                       lp.lower,
+                       lp.upper,
+                       deficient,
+                       slacks_needed,
+                       basic_list,
+                       nonbasic_list,
+                       vstatus);
           if (factorize_basis(
                 lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) {
             settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m);
@@ -1132,7 +1139,15 @@ crossover_status_t crossover(const lp_problem_t<i_t, f_t>& lp,
   rank = factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed);
   if (rank != m) {
     settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m);
-    basis_repair(lp.A, settings, lp.lower, lp.upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
+    basis_repair(lp.A,
+                 settings,
+                 lp.lower,
+                 lp.upper,
+                 deficient,
+                 slacks_needed,
+                 basic_list,
+                 nonbasic_list,
+                 vstatus);
     if (factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) ==
         -1) {
       settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m);
@@ -1323,7 +1338,15 @@ crossover_status_t crossover(const lp_problem_t<i_t, f_t>& lp,
         factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed);
       if (rank != m) {
         settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m);
-        basis_repair(lp.A, settings, lp.lower, lp.upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
+        basis_repair(lp.A,
+                     settings,
+                     lp.lower,
+                     lp.upper,
+                     deficient,
+                     slacks_needed,
+                     basic_list,
+                     nonbasic_list,
+                     vstatus);
         if (factorize_basis(
               lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) {
           settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m);
diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp
index e0ac7239e..3aeef35e1 100644
--- a/cpp/src/dual_simplex/phase2.cpp
+++ b/cpp/src/dual_simplex/phase2.cpp
@@ -633,7 +633,7 @@ f_t compute_initial_primal_infeasibilities(const lp_problem_t<i_t, f_t>& lp,
   infeasibility_indices.reserve(n);
   infeasibility_indices.clear();
   f_t primal_inf_squared = 0.0;
-  primal_inf = 0.0;
+  primal_inf             = 0.0;
   for (i_t k = 0; k < m; ++k) {
     const i_t j            = basic_list[k];
     const f_t lower_infeas = lp.lower[j] - x[j];
@@ -2245,7 +2245,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
     assert(superbasic_list.size() == 0);
     assert(nonbasic_list.size() == n - m);
 
-    if (ft.refactor_basis(lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus) > 0) {
+    if (ft.refactor_basis(lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus) >
+        0) {
       return dual::status_t::NUMERICAL;
     }
 
@@ -2362,8 +2363,14 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
   phase2::compute_bounded_info(lp.lower, lp.upper, bounded_variables);
 
   f_t primal_infeasibility;
-  f_t primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities(
-    lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices, primal_infeasibility);
+  f_t primal_infeasibility_squared =
+    phase2::compute_initial_primal_infeasibilities(lp,
+                                                   settings,
+                                                   basic_list,
+                                                   x,
+                                                   squared_infeasibilities,
+                                                   infeasibility_indices,
+                                                   primal_infeasibility);
 
 #ifdef CHECK_BASIC_INFEASIBILITIES
   phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 0);
@@ -2561,9 +2568,15 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
             std::vector<f_t> unperturbed_x(n);
             phase2::compute_primal_solution_from_basis(
               lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x);
-            x                    = unperturbed_x;
-            primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities(
-              lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices, primal_infeasibility);
+            x = unperturbed_x;
+            primal_infeasibility_squared =
+              phase2::compute_initial_primal_infeasibilities(lp,
+                                                             settings,
+                                                             basic_list,
+                                                             x,
+                                                             squared_infeasibilities,
+                                                             infeasibility_indices,
+                                                             primal_infeasibility);
             settings.log.printf("Updated primal infeasibility: %e\n", primal_infeasibility);
 
             objective = lp.objective;
@@ -2598,9 +2611,15 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
             std::vector<f_t> unperturbed_x(n);
             phase2::compute_primal_solution_from_basis(
               lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x);
-            x                    = unperturbed_x;
-            primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities(
-              lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices, primal_infeasibility);
+            x = unperturbed_x;
+            primal_infeasibility_squared =
+              phase2::compute_initial_primal_infeasibilities(lp,
+                                                             settings,
+                                                             basic_list,
+                                                             x,
+                                                             squared_infeasibilities,
+                                                             infeasibility_indices,
+                                                             primal_infeasibility);
 
             const f_t orig_dual_infeas = phase2::dual_infeasibility(
               lp, settings, vstatus, z, settings.tight_tol, settings.dual_tol);
@@ -2888,14 +2907,15 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
 #endif
     if (should_refactor) {
       bool should_recompute_x = false;
-      if (ft.refactor_basis(lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus) > 0) {
+      if (ft.refactor_basis(
+            lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus) > 0) {
         should_recompute_x = true;
         settings.log.printf("Failed to factorize basis. Iteration %d\n", iter);
         if (toc(start_time) > settings.time_limit) { return dual::status_t::TIME_LIMIT; }
         i_t count = 0;
         i_t deficient_size;
-        while ((deficient_size =
-                  ft.refactor_basis(lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus)) > 0) {
+        while ((deficient_size = ft.refactor_basis(
+                  lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus)) > 0) {
           settings.log.printf("Failed to repair basis. Iteration %d. %d deficient columns.\n",
                               iter,
                               static_cast<int>(deficient_size));
@@ -2917,8 +2937,14 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
           lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x);
         x = unperturbed_x;
       }
-      primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities(
-        lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices, primal_infeasibility);
+      primal_infeasibility_squared =
+        phase2::compute_initial_primal_infeasibilities(lp,
+                                                       settings,
+                                                       basic_list,
+                                                       x,
+                                                       squared_infeasibilities,
+                                                       infeasibility_indices,
+                                                       primal_infeasibility);
     }
 #ifdef CHECK_BASIC_INFEASIBILITIES
     phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 7);
diff --git a/cpp/src/dual_simplex/primal.cpp b/cpp/src/dual_simplex/primal.cpp
index 445177fac..3d9849fbe 100644
--- a/cpp/src/dual_simplex/primal.cpp
+++ b/cpp/src/dual_simplex/primal.cpp
@@ -298,7 +298,15 @@ primal::status_t primal_phase2(i_t phase,
     factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed);
   if (rank != m) {
     settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m);
-    basis_repair(lp.A, settings, lp.lower, lp.upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus);
+    basis_repair(lp.A,
+                 settings,
+                 lp.lower,
+                 lp.upper,
+                 deficient,
+                 slacks_needed,
+                 basic_list,
+                 nonbasic_list,
+                 vstatus);
     if (factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) ==
         -1) {
       settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m);

From 3c3683668e9e031003c95f399aa99ff34fd8b218 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Wed, 17 Dec 2025 17:16:39 -0800
Subject: [PATCH 15/27] Separate out cuts logic into several classes

---
 cpp/src/dual_simplex/CMakeLists.txt         |    3 +-
 cpp/src/dual_simplex/branch_and_bound.cpp   |  623 ++---------
 cpp/src/dual_simplex/branch_and_bound.hpp   |    3 +
 cpp/src/dual_simplex/cuts.cpp               | 1050 +++++++++++++++++++
 cpp/src/dual_simplex/cuts.hpp               |  221 ++++
 cpp/src/dual_simplex/solve.cpp              |  219 ----
 cpp/src/dual_simplex/solve.hpp              |   13 -
 cpp/src/dual_simplex/sparse_matrix.cpp      |    2 +-
 cpp/src/mip/diversity/diversity_manager.cu  |    2 +-
 cpp/src/mip/diversity/lns/rins.cu           |    1 +
 cpp/src/mip/solver.cu                       |    4 +-
 cpp/tests/dual_simplex/unit_tests/solve.cpp |   10 +-
 12 files changed, 1396 insertions(+), 755 deletions(-)
 create mode 100644 cpp/src/dual_simplex/cuts.cpp
 create mode 100644 cpp/src/dual_simplex/cuts.hpp

diff --git a/cpp/src/dual_simplex/CMakeLists.txt b/cpp/src/dual_simplex/CMakeLists.txt
index a376ee23d..157a00a07 100644
--- a/cpp/src/dual_simplex/CMakeLists.txt
+++ b/cpp/src/dual_simplex/CMakeLists.txt
@@ -10,6 +10,7 @@ set(DUAL_SIMPLEX_SRC_FILES
   ${CMAKE_CURRENT_SOURCE_DIR}/basis_updates.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/bound_flipping_ratio_test.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/branch_and_bound.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/cuts.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/crossover.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/folding.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/initial_basis.cpp
@@ -33,7 +34,7 @@ set(DUAL_SIMPLEX_SRC_FILES
   )
 
 # Uncomment to enable debug info
-#set_source_files_properties(${DUAL_SIMPLEX_SRC_FILES} DIRECTORY ${CMAKE_SOURCE_DIR} PROPERTIES COMPILE_OPTIONS "-g1")
+set_source_files_properties(${DUAL_SIMPLEX_SRC_FILES} DIRECTORY ${CMAKE_SOURCE_DIR} PROPERTIES COMPILE_OPTIONS "-g1")
 
 set(CUOPT_SRC_FILES ${CUOPT_SRC_FILES}
   ${DUAL_SIMPLEX_SRC_FILES} PARENT_SCOPE)
diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp
index ae53d1ba3..945d84215 100644
--- a/cpp/src/dual_simplex/branch_and_bound.cpp
+++ b/cpp/src/dual_simplex/branch_and_bound.cpp
@@ -5,10 +5,9 @@
  */
 /* clang-format on */
 
-#include <omp.h>
-#include <algorithm>
 #include <dual_simplex/branch_and_bound.hpp>
 #include <dual_simplex/basis_solves.hpp>
+#include <dual_simplex/cuts.hpp>
 #include <dual_simplex/initial_basis.hpp>
 #include <dual_simplex/logger.hpp>
 #include <dual_simplex/mip_node.hpp>
@@ -20,6 +19,9 @@
 #include <dual_simplex/tic_toc.hpp>
 #include <dual_simplex/user_problem.hpp>
 
+#include <omp.h>
+
+#include <algorithm>
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
@@ -255,6 +257,7 @@ i_t branch_and_bound_t<i_t, f_t>::get_heap_size()
 template <typename i_t, typename f_t>
 void branch_and_bound_t<i_t, f_t>::set_new_solution(const std::vector<f_t>& solution)
 {
+  mutex_original_lp_.lock();
   if (solution.size() != original_problem_.num_cols) {
     settings_.log.printf(
       "Solution size mismatch %ld %d\n", solution.size(), original_problem_.num_cols);
@@ -263,16 +266,22 @@ void branch_and_bound_t<i_t, f_t>::set_new_solution(const std::vector<f_t>& solu
   crush_primal_solution<i_t, f_t>(
     original_problem_, original_lp_, solution, new_slacks_, crushed_solution);
   f_t obj             = compute_objective(original_lp_, crushed_solution);
+  mutex_original_lp_.unlock();
   bool is_feasible    = false;
   bool attempt_repair = false;
   mutex_upper_.lock();
-  if (obj < upper_bound_) {
+  f_t current_upper_bound = upper_bound_;
+  mutex_upper_.unlock();
+  if (obj < current_upper_bound) {
     f_t primal_err;
     f_t bound_err;
     i_t num_fractional;
+    mutex_original_lp_.lock();
     is_feasible = check_guess(
       original_lp_, settings_, var_types_, crushed_solution, primal_err, bound_err, num_fractional);
-    if (is_feasible) {
+    mutex_original_lp_.unlock();
+    mutex_upper_.lock();
+    if (is_feasible && obj < upper_bound_) {
       upper_bound_ = obj;
       incumbent_.set_incumbent_solution(obj, crushed_solution);
     } else {
@@ -287,8 +296,8 @@ void branch_and_bound_t<i_t, f_t>::set_new_solution(const std::vector<f_t>& solu
           num_fractional);
       }
     }
+    mutex_upper_.unlock();
   }
-  mutex_upper_.unlock();
 
   if (is_feasible) {
     if (status_ == mip_exploration_status_t::RUNNING) {
@@ -297,7 +306,7 @@ void branch_and_bound_t<i_t, f_t>::set_new_solution(const std::vector<f_t>& solu
       std::string gap = user_mip_gap<f_t>(user_obj, user_lower);
 
       settings_.log.printf(
-        "H                           %+13.6e    %+10.6e                        %s %9.2f\n",
+        "H                                %+13.6e    %+10.6e                        %s %9.2f\n",
         user_obj,
         user_lower,
         gap.c_str(),
@@ -410,7 +419,7 @@ void branch_and_bound_t<i_t, f_t>::repair_heuristic_solutions()
           std::string user_gap = user_mip_gap<f_t>(obj, lower);
 
           settings_.log.printf(
-            "H                        %+13.6e  %+10.6e                      %s %9.2f\n",
+            "H                           %+13.6e    %+10.6e                              %s %9.2f\n",
             obj,
             lower,
             user_gap.c_str(),
@@ -1136,6 +1145,9 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
       "|  Time  |\n");
   }
 
+  cut_pool_t<i_t, f_t> cut_pool(original_lp_.num_cols, settings_);
+  cut_generation_t<i_t, f_t> cut_generation(cut_pool);
+
   for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) {
     if (num_fractional == 0) {
 #ifdef PRINT_SOLUTION
@@ -1173,538 +1185,117 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
         settings_.log.printf("Fractional variable %d lower %e value %e upper %e\n", j, original_lp_.lower[j], root_relax_soln_.x[j], original_lp_.upper[j]);
       }
 #endif
-      // Let's look for Gomory cuts
-      // Compute b_bar
-      std::vector<f_t> b_bar(original_lp_.num_rows);
-      basis_update.b_solve(original_lp_.rhs, b_bar);
-
-      std::vector<f_t> nonbasic_mark(original_lp_.num_cols, 0);
-      for (i_t j : nonbasic_list) {
-        if (j < 0 || j >= original_lp_.num_cols) {
-          settings_.log.printf("nonbasic_list out of bounds %d num_cols %d\n", j, original_lp_.num_cols);
-          exit(1);
-        }
-        nonbasic_mark[j] = 1;
-      }
-
-      std::vector<f_t> x_workspace(original_lp_.num_cols, 0.0);
-      std::vector<i_t> x_mark(original_lp_.num_cols, 0);
-
-      std::vector<i_t> abar_indices;
-      abar_indices.reserve(original_lp_.num_cols);
-
-      std::vector<i_t> has_lower(original_lp_.num_cols, 0);
-      std::vector<i_t> has_upper(original_lp_.num_cols, 0);
 
-      bool needs_complement = false;
-      for (i_t j = 0; j < original_lp_.num_cols; j++) {
-        if (original_lp_.lower[j] < 0) {
-          settings_.log.printf(
-            "Variable %d has negative lower bound %e\n", j, original_lp_.lower[j]);
-          exit(1);
-        }
-        const f_t uj      = original_lp_.upper[j];
-        const f_t lj      = original_lp_.lower[j];
-        if (uj != inf || lj != 0.0) {
-          needs_complement = true;
-        }
-        const f_t xstar_j = root_relax_soln_.x[j];
-        if (uj < inf) {
-          if (uj - xstar_j <= xstar_j - lj) {
-            has_upper[j] = 1;
-          } else {
-            has_lower[j] = 1;
-          }
-          continue;
-        }
+      // Generate cuts and add them to the cut pool
+      cut_generation.generate_cuts(original_lp_, settings_, Arow, var_types_, basis_update, root_relax_soln_.x, basic_list, nonbasic_list);
 
-        if (lj > -inf) {
-          has_lower[j] = 1;
-        }
-      }
-#ifdef PRINT_COMPLEMENT_INFO
-      settings_.log.printf("needs_complement %d\n", needs_complement);
-#endif
-
-      csr_matrix_t<i_t, f_t> C(0, original_lp_.num_cols, 0);
-      C.row_start[0] = 0;
+      // Score the cuts
+      cut_pool.score_cuts(root_relax_soln_.x);
+      // Get the best cuts from the cut pool
+      csr_matrix_t<i_t, f_t> cuts_to_add(0, original_lp_.num_cols, 0);
       std::vector<f_t> cut_rhs;
+      i_t num_cuts = cut_pool.get_best_cuts(cuts_to_add, cut_rhs);
 
-      for (i_t i = 0; i < original_lp_.num_rows; i++) {
-        const i_t j = basic_list[i];
-        //settings_.log.printf(
-        //  "Variable %d type %d val %e\n", j, var_types_[j], root_relax_soln_.x[j]);
-        if (var_types_[j] != variable_type_t::INTEGER) { continue; }
-        const f_t x_j = root_relax_soln_.x[j];
-        if (std::abs(x_j - std::round(x_j)) < settings_.integer_tol) { continue; }
-#ifdef PRINT_CUT_INFO
-        settings_.log.printf("Generating cut for variable %d relaxed value %e row %d\n", j, x_j, i);
-#endif
-#ifdef PRINT_BASIS
-        for (i_t h = 0; h < basic_list.size(); h++) {
-          settings_.log.printf("basic_list[%d] = %d\n", h, basic_list[h]);
-        }
-#endif
-
-        // Solve B^T u_bar = e_i
-        sparse_vector_t<i_t, f_t> e_i(original_lp_.num_rows, 1);
-        e_i.i[0] = i;
-        e_i.x[0] = 1.0;
-        sparse_vector_t<i_t, f_t> u_bar(original_lp_.num_rows, 0);
-        basis_update.b_transpose_solve(e_i, u_bar);
-
-        std::vector<f_t> u_bar_dense(original_lp_.num_rows);
-        u_bar.to_dense(u_bar_dense);
-
-        std::vector<f_t> BTu_bar(original_lp_.num_rows);
-        b_transpose_multiply(original_lp_, basic_list, u_bar_dense, BTu_bar);
-        for (i_t k = 0; k < original_lp_.num_rows; k++) {
-          if (k == i) {
-            if (std::abs(BTu_bar[k] - 1.0) > 1e-6) {
-              settings_.log.printf("BTu_bar[%d] = %e i %d\n", k, BTu_bar[k], i);
-              exit(1);
-            }
-          } else {
-            if (std::abs(BTu_bar[k]) > 1e-6) {
-              settings_.log.printf("BTu_bar[%d] = %e i %d\n", k, BTu_bar[k], i);
-              exit(1);
-            }
-          }
-        }
-
-        // Compute a_bar = N^T u_bar
-        // TODO: This is similar to a function in phase2 of dual simplex. See if it can be reused.
-        const i_t nz_ubar = u_bar.i.size();
-        for (i_t k = 0; k < nz_ubar; k++) {
-          const i_t ii        = u_bar.i[k];
-          const f_t u_bar_i   = u_bar.x[k];
-          const i_t row_start = Arow.col_start[ii];
-          const i_t row_end   = Arow.col_start[ii + 1];
-          for (i_t p = row_start; p < row_end; p++) {
-            const i_t jj = Arow.i[p];
-            if (nonbasic_mark[jj] == 1) {
-              x_workspace[jj] += u_bar_i * Arow.x[p];
-              if (!x_mark[jj]) {
-                x_mark[jj] = 1;
-                abar_indices.push_back(jj);
-              }
-            }
-          }
-        }
-
-        sparse_vector_t<i_t, f_t> a_bar(original_lp_.num_cols, abar_indices.size() + 1);
-        for (i_t k = 0; k < abar_indices.size(); k++) {
-          const i_t jj = abar_indices[k];
-          a_bar.i[k]   = jj;
-          a_bar.x[k]   = x_workspace[jj];
-        }
-
-        // Clear the workspace
-        for (i_t jj : abar_indices) {
-          x_workspace[jj] = 0.0;
-          x_mark[jj]      = 0;
-        }
-        abar_indices.clear();
-
-        // We should now have the base inequality
-        // x_j + a_bar^T x_N >= b_bar_i
-        // We add x_j into a_bar so that everything is in a single sparse_vector_t
-        a_bar.i[a_bar.i.size() - 1] = j;
-        a_bar.x[a_bar.x.size() - 1] = 1.0;
-
-        std::vector<f_t> a_bar_dense(original_lp_.num_cols);
-        a_bar.to_dense(a_bar_dense);
-
-        f_t a_bar_dense_dot = dot<i_t, f_t>(a_bar_dense, root_relax_soln_.x);
-        if (std::abs(a_bar_dense_dot - b_bar[i]) > 1e-6) {
-          settings_.log.printf("a_bar_dense_dot = %e b_bar[%d] = %e\n", a_bar_dense_dot, i, b_bar[i]);
-          settings_.log.printf("x_j %e b_bar_i %e\n", x_j, b_bar[i]);
-          exit(1);
-        }
-
-        // Skip cuts that are shallow
-        const f_t shallow_tol = 1e-2;
-        if (std::abs(x_j - std::round(x_j)) < shallow_tol) {
-          //settings_.log.printf("Skipping shallow cut %d. b_bar[%d] = %e x_j %e\n", i, i, b_bar[i], x_j);
-          continue;
-        }
-
-        const f_t f_val = b_bar[i] - std::floor(b_bar[i]);
-        if (f_val < 0.01 || f_val > 0.99) {
-          settings_.log.printf("Skipping cut %d. b_bar[%d] = %e f_val %e\n", i, i, b_bar[i], f_val);
-          continue;
-        }
-
-#ifdef PRINT_BASE_INEQUALITY
-        // Print out the base inequality
-        for (i_t k = 0; k < a_bar.i.size(); k++) {
-          const i_t jj = a_bar.i[k];
-          const f_t aj = a_bar.x[k];
-          settings_.log.printf("a_bar[%d] = %e\n", k, aj);
-        }
-        settings_.log.printf("b_bar[%d] = %e\n", i, b_bar[i]);
-#endif
-
-        auto f = [](f_t q_1, f_t q_2) -> f_t {
-          f_t q_1_hat = q_1 - std::floor(q_1);
-          f_t q_2_hat = q_2 - std::floor(q_2);
-          return std::min(q_1_hat, q_2_hat) + q_2_hat * std::floor(q_1);
-        };
-
-        auto h = [](f_t q) -> f_t { return std::max(q, 0.0); };
-
-
-        std::vector<i_t> cut_indices;
-        cut_indices.reserve(a_bar.i.size());
-        f_t R;
-        if (!needs_complement) {
-          R = (b_bar[i] - std::floor(b_bar[i])) * std::ceil(b_bar[i]);
-
-          for (i_t k = 0; k < a_bar.i.size(); k++) {
-            const i_t jj = a_bar.i[k];
-            f_t aj       = a_bar.x[k];
-            if (var_types_[jj] == variable_type_t::INTEGER) {
-              x_workspace[jj] += f(aj, b_bar[i]);
-              if (!x_mark[jj] && x_workspace[jj] != 0.0) {
-                x_mark[jj] = 1;
-                cut_indices.push_back(jj);
-              }
-            } else {
-              x_workspace[jj] += h(aj);
-              if (!x_mark[jj] && x_workspace[jj] != 0.0) {
-                x_mark[jj] = 1;
-                cut_indices.push_back(jj);
-              }
-            }
-          }
-        } else {
-          // Compute r
-          f_t r = b_bar[i];
-          for (i_t k = 0; k < a_bar.i.size(); k++) {
-            const i_t jj = a_bar.i[k];
-            if (has_upper[jj]) {
-              const f_t uj = original_lp_.upper[jj];
-              r -= uj * a_bar.x[k];
-              continue;
-            }
-            if (has_lower[jj]) {
-              const f_t lj = original_lp_.lower[jj];
-              r -= lj * a_bar.x[k];
-            }
-          }
-
-          // Compute R
-          R = std::ceil(r) * (r - std::floor(r));
-          for (i_t k = 0; k < a_bar.i.size(); k++) {
-            const i_t jj = a_bar.i[k];
-            const f_t aj = a_bar.x[k];
-            if (has_upper[jj]) {
-              const f_t uj = original_lp_.upper[jj];
-              if (var_types_[jj] == variable_type_t::INTEGER) {
-                R -= f(-aj, r) * uj;
-              } else {
-                R -= h(-aj) * uj;
-              }
-            } else if (has_lower[jj]) {
-              const f_t lj = original_lp_.lower[jj];
-              if (var_types_[jj] == variable_type_t::INTEGER) {
-                R += f(aj, r) * lj;
-              } else {
-                R += h(aj) * lj;
-              }
-            }
-          }
-
-          // Compute the cut coefficients
-          for (i_t k = 0; k < a_bar.i.size(); k++) {
-            const i_t jj = a_bar.i[k];
-            const f_t aj = a_bar.x[k];
-            if (has_upper[jj]) {
-              if (var_types_[jj] == variable_type_t::INTEGER) {
-                // Upper intersect I
-                x_workspace[jj] -= f(-aj, r);
-                if (!x_mark[jj] && x_workspace[jj] != 0.0) {
-                  x_mark[jj] = 1;
-                  cut_indices.push_back(jj);
-                }
-              } else {
-                // Upper intersect C
-                f_t h_j = h(-aj);
-                if (h_j != 0.0) {
-                  x_workspace[jj] -= h_j;
-                  if (!x_mark[jj]) {
-                    x_mark[jj] = 1;
-                    cut_indices.push_back(jj);
-                  }
-                }
-              }
-            } else if (var_types_[jj] == variable_type_t::INTEGER) {
-              // I \ Upper
-              x_workspace[jj] += f(aj, r);
-              if (!x_mark[jj] && x_workspace[jj] != 0.0) {
-                x_mark[jj] = 1;
-                cut_indices.push_back(jj);
-              }
-            } else {
-              // C \ Upper
-              f_t h_j = h(aj);
-              if (h_j != 0.0) {
-                x_workspace[jj] += h_j;
-                if (!x_mark[jj]) {
-                  x_mark[jj] = 1;
-                  cut_indices.push_back(jj);
-                }
-              }
-            }
-          }
-        }
-
-        sparse_vector_t<i_t, f_t> cut(original_lp_.num_cols, 0);
-        cut.i.reserve(cut_indices.size());
-        cut.x.reserve(cut_indices.size());
-        for (i_t k = 0; k < cut_indices.size(); k++) {
-          const i_t jj = cut_indices[k];
-
-          // Check for small coefficients
-          const f_t aj = x_workspace[jj];
-          if (std::abs(aj) < 1e-6) {
-            if (aj >= 0.0 && original_lp_.upper[jj] < inf) {
-              // Move this to the right-hand side
-              //settings_.log.printf("Moving %e to the right-hand side for variable %d\n", aj * original_lp_.upper[jj], jj);
-              R -= aj * original_lp_.upper[jj];
-              continue;
-            } else if (aj <= 0.0 && original_lp_.lower[jj] > -inf) {
-              //settings_.log.printf("Moving %e to the right-hand side for variable %d\n", aj * original_lp_.lower[jj], jj);
-              R += aj * original_lp_.lower[jj];
-              continue;
-            }
-            else {
-              //settings_.log.printf("Small coefficient %e for variable %d lower %e upper %e\n", aj, jj, original_lp_.lower[jj], original_lp_.upper[jj]);
-            }
-          }
-          cut.i.push_back(jj);
-          cut.x.push_back(x_workspace[jj]);
-        }
-
-        // Clear the workspace
-        for (i_t jj : cut_indices) {
-          x_workspace[jj] = 0.0;
-          x_mark[jj]      = 0;
-        }
-
-        if (cut.x.size() == 0)
-        {
-          continue;
-        }
-        if (cut.x.size() >= 0.7 * original_lp_.num_cols)
-       {
-          settings_.log.printf("Cut %d has %d nonzeros. Skipping because it is too dense %.2f\n", i, cut.x.size(), static_cast<f_t>(cut.x.size()) / original_lp_.num_cols);
-          continue;
-        }
-
-        // Sort the coefficients by their index
-        cut.sort();
-        // The new cut is: g'*x >= R
-        // But we want to have it in the form h'*x <= b
-        cut.negate();
-        C.append_row(cut);
-        cut_rhs.push_back(-R);
-      }
+      cuts_to_add.check_matrix();
 
-      csc_matrix_t<i_t, f_t> C_col(C.m, C.n, 0);
-      C.to_compressed_col(C_col);
-#ifdef PRINT_CUTS
-      C_col.print_matrix();
-#endif
-
-      C.check_matrix();
-#ifdef PRINT_CUT_RHS
-      for (i_t k = 0; k < cut_rhs.size(); k++) {
-        settings_.log.printf("cut_rhs[%d] = %e\n", k, cut_rhs[k]);
-      }
-#endif
-
-#ifdef PRINT_CUT_INFO
-      settings_.log.printf("C nz %d\n", C.row_start[C.m]);
-      settings_.log.printf("C m %d cut rhs size %d\n", C.m, cut_rhs.size());
-      settings_.log.printf("original_lp_.num_cols %d\n", original_lp_.num_cols);
-#endif
-
-#ifdef PRINT_OPTIMAL
-      for (i_t j = 0; j < original_lp_.num_cols; j++) {
-        lp_settings.log.printf("x[%d] = %e\n", j, root_relax_soln_.x[j]);
-      }
-#endif
-
-      // Check to see that this is a cut i.e C*x > d
-      std::vector<f_t> Cx(C.m);
-      matrix_vector_multiply(C_col, 1.0, root_relax_soln_.x, 0.0, Cx);
-      f_t min_cut_violation = inf;
-      for (i_t k = 0; k < Cx.size(); k++) {
-        //lp_settings.log.printf("Cx[%d] = %e cut_rhs[%d] = %e\n", k, Cx[k], k, cut_rhs[k]);
-        if (Cx[k] <= cut_rhs[k]) {
-          settings_.log.printf("C*x <= d for cut %d\n", k);
-          exit(1);
-        }
-        min_cut_violation = std::min(min_cut_violation, Cx[k] - cut_rhs[k]);
-      }
 #ifdef PRINT_MIN_CUT_VIOLATION
+      f_t min_cut_violation = minimum_violation(cuts_to_add, cut_rhs, root_relax_soln_.x);
       settings_.log.printf("Min cut violation %e\n", min_cut_violation);
 #endif
 
       // Resolve the LP with the new cuts
-      settings_.log.printf("Solving LP with %d cuts (%d nonzeros). Total constraints %d\n", C.m, C.row_start[C.m], C.m + original_lp_.num_rows);
+      settings_.log.printf("Solving LP with %d cuts (%d cut nonzeros). Cuts in pool %d. Total constraints %d\n",
+                           num_cuts,
+                           cuts_to_add.row_start[cuts_to_add.m],
+                           cut_pool.pool_size(),
+                           cuts_to_add.m + original_lp_.num_rows);
       lp_settings.log.log = false;
 
-      lp_status_t cut_status = solve_linear_program_with_cuts(stats_.start_time,
-                                                              lp_settings,
-                                                              C,
-                                                              cut_rhs,
-                                                              original_lp_,
-                                                              root_relax_soln_,
-                                                              basis_update,
-                                                              basic_list,
-                                                              nonbasic_list,
-                                                              root_vstatus_,
-                                                              edge_norms_);
-      settings_.log.printf("Cut LP iterations %d. A nz %d\n", root_relax_soln_.iterations, original_lp_.A.col_start[original_lp_.A.n]);
-      stats_.total_lp_iters += root_relax_soln_.iterations;
-      root_objective_ = compute_objective(original_lp_, root_relax_soln_.x);
-
-      if (cut_status != lp_status_t::OPTIMAL) {
-        lp_settings.log.printf("Cut status %d\n", cut_status);
+      mutex_original_lp_.lock();
+      i_t add_cuts_status = add_cuts(settings_,
+                                     cuts_to_add,
+                                     cut_rhs,
+                                     original_lp_,
+                                     root_relax_soln_,
+                                     basis_update,
+                                     basic_list,
+                                     nonbasic_list,
+                                     root_vstatus_,
+                                     edge_norms_);
+      mutex_original_lp_.unlock();
+      if (add_cuts_status != 0) {
+        settings_.log.printf("Failed to add cuts\n");
         exit(1);
       }
 
-      original_lp_.A.transpose(Arow);
+      // Try to do bound strengthening
       var_types_.resize(original_lp_.num_cols, variable_type_t::CONTINUOUS);
-      std::vector<i_t> cuts_to_remove;
-      cuts_to_remove.reserve(original_lp_.num_rows - original_rows);
-      std::vector<i_t> slacks_to_remove;
-      slacks_to_remove.reserve(original_lp_.num_rows - original_rows);
-      const f_t dual_tol = 1e-10;
-      for (i_t k = original_rows; k < original_lp_.num_rows; k++) {
-        if (std::abs(root_relax_soln_.y[k]) < dual_tol) {
-          const i_t row_start = Arow.col_start[k];
-          const i_t row_end   = Arow.col_start[k + 1];
-          i_t last_slack = -1;
-          const f_t slack_tol = 1e-3;
-          for (i_t p = row_start; p < row_end; p++) {
-            const i_t jj = Arow.i[p];
-            const i_t col_len = original_lp_.A.col_start[jj + 1] - original_lp_.A.col_start[jj];
-            if (var_types_[jj] == variable_type_t::CONTINUOUS &&
-                Arow.x[p] == 1.0 &&
-                original_lp_.lower[jj] == 0.0 &&
-                original_lp_.upper[jj] == inf &&
-                root_vstatus_[jj] == variable_status_t::BASIC &&
-                col_len == 1 &&
-                root_relax_soln_.x[jj] > slack_tol) {
-              last_slack = jj;
-            }
-          }
-          if (last_slack != -1) {
-            cuts_to_remove.push_back(k);
-            slacks_to_remove.push_back(last_slack);
-          }
-        }
-      }
 
-      if (cuts_to_remove.size() > 0) {
-        settings_.log.printf("Removing %d cuts\n", cuts_to_remove.size());
-        std::vector<i_t> marked_rows(original_lp_.num_rows, 0);
-        for (i_t i : cuts_to_remove) {
-          marked_rows[i] = 1;
-        }
-        std::vector<i_t> marked_cols(original_lp_.num_cols, 0);
-        for (i_t j : slacks_to_remove) {
-          marked_cols[j] = 1;
-        }
+      std::vector<bool> bounds_changed(original_lp_.num_cols, true);
+      std::vector<char> row_sense;
 
-        std::vector<f_t> new_rhs(original_lp_.num_rows - cuts_to_remove.size());
-        std::vector<f_t> new_solution_y(original_lp_.num_rows - cuts_to_remove.size());
-        i_t h = 0;
-        for (i_t i = 0; i < original_lp_.num_rows; i++) {
-          if (!marked_rows[i]) {
-            new_rhs[h] = original_lp_.rhs[i];
-            new_solution_y[h] = root_relax_soln_.y[i];
-            h++;
-          }
-        }
+      settings_.log.printf("Before A check\n");
+      original_lp_.A.check_matrix();
+      settings_.log.printf("Before A transpose\n");
+      original_lp_.A.transpose(Arow);
+      settings_.log.printf("After A transpose\n");
+      bool feasible =
+        bound_strengthening(row_sense, settings_, original_lp_, Arow, var_types_, bounds_changed);
 
+      if (!feasible) {
+        settings_.log.printf("Bound strengthening failed\n");
+        exit(1);
+      }
 
-        Arow.remove_columns(marked_rows);
-        Arow.transpose(original_lp_.A);
-
-        std::vector<f_t> new_objective(original_lp_.num_cols - slacks_to_remove.size());
-        std::vector<f_t> new_lower(original_lp_.num_cols - slacks_to_remove.size());
-        std::vector<f_t> new_upper(original_lp_.num_cols - slacks_to_remove.size());
-        std::vector<variable_type_t> new_var_types(original_lp_.num_cols - slacks_to_remove.size());
-        std::vector<variable_status_t> new_vstatus(original_lp_.num_cols - slacks_to_remove.size());
-        std::vector<i_t> new_basic_list;
-        new_basic_list.reserve(original_lp_.num_rows - slacks_to_remove.size());
-        std::vector<i_t> new_nonbasic_list;
-        new_nonbasic_list.reserve(nonbasic_list.size());
-        std::vector<f_t> new_solution_x(original_lp_.num_cols - slacks_to_remove.size());
-        std::vector<f_t> new_solution_z(original_lp_.num_cols - slacks_to_remove.size());
-        h = 0;
-        for (i_t k = 0; k < original_lp_.num_cols; k++) {
-          if (!marked_cols[k]) {
-            new_objective[h] = original_lp_.objective[k];
-            new_lower[h] = original_lp_.lower[k];
-            new_upper[h] = original_lp_.upper[k];
-            new_var_types[h] = var_types_[k];
-            new_vstatus[h] = root_vstatus_[k];
-            new_solution_x[h] = root_relax_soln_.x[k];
-            new_solution_z[h] = root_relax_soln_.z[k];
-            if (new_vstatus[h] != variable_status_t::BASIC) {
-              new_nonbasic_list.push_back(h);
-            } else {
-              new_basic_list.push_back(h);
-            }
-            h++;
-          }
-        }
-        original_lp_.A.remove_columns(marked_cols);
-        original_lp_.A.transpose(Arow);
-        original_lp_.objective = new_objective;
-        original_lp_.lower = new_lower;
-        original_lp_.upper = new_upper;
-        original_lp_.rhs = new_rhs;
-        var_types_ = new_var_types;
-        original_lp_.num_cols = original_lp_.A.n;
-        original_lp_.num_rows = original_lp_.A.m;
-        basic_list = new_basic_list;
-        nonbasic_list = new_nonbasic_list;
-        root_vstatus_ = new_vstatus;
-        root_relax_soln_.x = new_solution_x;
-        root_relax_soln_.y = new_solution_y;
-        root_relax_soln_.z = new_solution_z;
-
-#ifdef PRINT_SIZES
-        settings_.log.printf("A %d x %d\n", original_lp_.A.m, original_lp_.A.n);
-        settings_.log.printf("basic_list size %d\n", basic_list.size());
-        settings_.log.printf("nonbasic_list size %d\n", nonbasic_list.size());
-        settings_.log.printf("root_vstatus_ size %d\n", root_vstatus_.size());
-        settings_.log.printf("original_lp_.num_rows %d\n", original_lp_.num_rows);
-        settings_.log.printf("original_lp_.num_cols %d\n", original_lp_.num_cols);
-        settings_.log.printf("root_relax_soln_.x size %d\n", root_relax_soln_.x.size());
-        settings_.log.printf("root_relax_soln_.y size %d\n", root_relax_soln_.y.size());
-        settings_.log.printf("root_relax_soln_.z size %d\n", root_relax_soln_.z.size());
-        settings_.log.printf("rhs size %ld\n", original_lp_.rhs.size());
-        settings_.log.printf("lower size %ld\n", original_lp_.lower.size());
-        settings_.log.printf("upper size %ld\n", original_lp_.upper.size());
-        settings_.log.printf("objective size %ld\n", original_lp_.objective.size());
-        settings_.log.printf("var_types_ size %ld\n", var_types_.size());
-#endif
-        settings_.log.printf("After removal %d rows %d columns %d nonzeros\n",
-                            original_lp_.num_rows,
-                            original_lp_.num_cols,
-                            original_lp_.A.col_start[original_lp_.A.n]);
+      // Adjust the solution
+      root_relax_soln_.x.resize(original_lp_.num_cols, 0.0);
+      root_relax_soln_.y.resize(original_lp_.num_rows, 0.0);
+      root_relax_soln_.z.resize(original_lp_.num_cols, 0.0);
+
+      // For now just clear the edge norms
+      edge_norms_.clear();
+      i_t iter              = 0;
+      bool initialize_basis = false;
+      dual::status_t cut_status = dual_phase2_with_advanced_basis(2,
+                                                                  0,
+                                                                  initialize_basis,
+                                                                  stats_.start_time,
+                                                                  original_lp_,
+                                                                  lp_settings,
+                                                                  root_vstatus_,
+                                                                  basis_update,
+                                                                  basic_list,
+                                                                  nonbasic_list,
+                                                                  root_relax_soln_,
+                                                                  iter,
+                                                                  edge_norms_);
+
+      settings_.log.printf("Cut LP iterations %d. A nz %d\n",
+                           iter,
+                           original_lp_.A.col_start[original_lp_.A.n]);
+      stats_.total_lp_iters += root_relax_soln_.iterations;
+      root_objective_ = compute_objective(original_lp_, root_relax_soln_.x);
 
-        basis_update.resize(original_lp_.num_rows);
-        basis_update.refactor_basis(original_lp_.A, settings_, basic_list, nonbasic_list, root_vstatus_);
+      if (cut_status != dual::status_t::OPTIMAL) {
+        settings_.log.printf("Cut status %d\n", cut_status);
+        exit(1);
       }
 
+      local_lower_bounds_.assign(settings_.num_bfs_threads, root_objective_);
+
+      remove_cuts(original_lp_,
+                  settings_,
+                  Arow,
+                  original_rows,
+                  var_types_,
+                  root_vstatus_,
+                  root_relax_soln_.x,
+                  root_relax_soln_.y,
+                  root_relax_soln_.z,
+                  basic_list,
+                  nonbasic_list,
+                  basis_update);
+
       fractional.clear();
       num_fractional = fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional);
 
diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp
index 5b304addd..ccbad335a 100644
--- a/cpp/src/dual_simplex/branch_and_bound.hpp
+++ b/cpp/src/dual_simplex/branch_and_bound.hpp
@@ -149,6 +149,9 @@ class branch_and_bound_t {
   // Local lower bounds for each thread
   std::vector<omp_atomic_t<f_t>> local_lower_bounds_;
 
+  // Mutex for the original LP
+  omp_mutex_t mutex_original_lp_;
+
   // Mutex for upper bound
   omp_mutex_t mutex_upper_;
 
diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp
new file mode 100644
index 000000000..4b8fc94dd
--- /dev/null
+++ b/cpp/src/dual_simplex/cuts.cpp
@@ -0,0 +1,1050 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#include <dual_simplex/cuts.hpp>
+
+
+namespace cuopt::linear_programming::dual_simplex {
+
+
+template <typename i_t, typename f_t>
+void cut_pool_t<i_t, f_t>::add_cut(i_t n, const sparse_vector_t<i_t, f_t>& cut, f_t rhs)
+{
+  // TODO: Need to deduplicate cuts and only add if the cut is not already in the pool
+
+  if (n > cut_storage_.n) {
+    cut_storage_.n = n;
+  }
+
+  for (i_t p = 0; p < cut.i.size(); p++) {
+    const i_t j = cut.i[p];
+    if (j >= original_vars_) {
+      settings_.log.printf(
+        "Cut has variable %d that is greater than original_vars_ %d\n", j, original_vars_);
+      return;
+    }
+  }
+
+  cut_storage_.append_row(cut);
+  rhs_storage_.push_back(rhs);
+  cut_age_.push_back(0);
+}
+
+
+template <typename i_t, typename f_t>
+f_t cut_pool_t<i_t, f_t>::cut_distance(i_t row, const std::vector<f_t>& x, f_t& cut_violation, f_t &cut_norm)
+{
+  const i_t row_start = cut_storage_.row_start[row];
+  const i_t row_end = cut_storage_.row_start[row + 1];
+  f_t cut_x = 0.0;
+  f_t dot = 0.0;
+  for (i_t p = row_start; p < row_end; p++) {
+    const i_t j = cut_storage_.j[p];
+    const f_t cut_coeff = cut_storage_.x[p];
+    cut_x += cut_coeff * x[j];
+    dot += cut_coeff * cut_coeff;
+  }
+  cut_violation = rhs_storage_[row] - cut_x;
+  cut_norm = std::sqrt(dot);
+  const f_t distance = cut_violation / cut_norm;
+  return distance;
+}
+
+template <typename i_t, typename f_t>
+f_t cut_pool_t<i_t, f_t>::cut_density(i_t row)
+{
+  const i_t row_start = cut_storage_.row_start[row];
+  const i_t row_end = cut_storage_.row_start[row + 1];
+  const i_t cut_nz = row_end - row_start;
+  const i_t original_vars = original_vars_;
+  return static_cast<f_t>(cut_nz) / original_vars;
+}
+
+template <typename i_t, typename f_t>
+f_t cut_pool_t<i_t, f_t>::cut_orthogonality(i_t i,  i_t j)
+{
+  const i_t i_start = cut_storage_.row_start[i];
+  const i_t i_end = cut_storage_.row_start[i + 1];
+  const i_t i_nz = i_end - i_start;
+  const i_t j_start = cut_storage_.row_start[j];
+  const i_t j_end = cut_storage_.row_start[j + 1];
+  const i_t j_nz = j_end - j_start;
+
+  f_t dot = sparse_dot(cut_storage_.j.data() + i_start, cut_storage_.x.data() + i_start, i_nz,
+                       cut_storage_.j.data() + j_start, cut_storage_.x.data() + j_start, j_nz);
+
+  f_t norm_i = cut_norms_[i];
+  f_t norm_j = cut_norms_[j];
+  return 1.0 - std::abs(dot) / (norm_i * norm_j);
+}
+
+template <typename i_t, typename f_t>
+void cut_pool_t<i_t, f_t>::score_cuts(std::vector<f_t>& x_relax)
+{
+  const f_t weight_distance = 1.0;
+  const f_t weight_orthogonality = 1.0;
+  cut_distances_.resize(cut_storage_.m, 0.0);
+  cut_norms_.resize(cut_storage_.m, 0.0);
+  cut_orthogonality_.resize(cut_storage_.m, 1);
+  cut_scores_.resize(cut_storage_.m, 0.0);
+  for (i_t i = 0; i < cut_storage_.m; i++) {
+    f_t violation;
+    cut_distances_[i] = cut_distance(i, x_relax, violation, cut_norms_[i]);
+    cut_scores_[i] = weight_distance * cut_distances_[i]  + weight_orthogonality * cut_orthogonality_[i];
+    //settings_.log.printf("Cut %d distance %e violation %e orthogonality %e score %e\n", i, cut_distances_[i], violation, cut_orthogonality_[i], cut_scores_[i]);
+  }
+
+  std::vector<i_t> sorted_indices(cut_storage_.m);
+  std::iota(sorted_indices.begin(), sorted_indices.end(), 0);
+  std::sort(sorted_indices.begin(), sorted_indices.end(), [&](i_t a, i_t b) {
+    return cut_scores_[a] > cut_scores_[b];
+  });
+
+  std::vector<i_t> indices;
+  indices.reserve(sorted_indices.size());
+
+
+  const i_t max_cuts = 2000;
+  const f_t min_orthogonality = 0.5;
+  const f_t min_cut_distance = 1e-4;
+  best_cuts_.reserve(std::min(max_cuts, cut_storage_.m));
+
+  while (scored_cuts_ < max_cuts && !sorted_indices.empty()) {
+    const i_t i = sorted_indices[0];
+
+    if (cut_distances_[i] <= min_cut_distance) {
+        break;
+    }
+
+    if (cut_age_[i] > 0) {
+        settings_.log.printf("Adding cut with age %d\n", cut_age_[i]);
+    }
+    //settings_.log.printf("Scored cuts %d. Adding cut %d score %e\n", scored_cuts_, i, cut_scores_[i]);
+
+    best_cuts_.push_back(i);
+    scored_cuts_++;
+
+    // Recompute the orthogonality for the remaining cuts
+    for (i_t k = 1; k < sorted_indices.size(); k++) {
+      const i_t j = sorted_indices[k];
+      cut_orthogonality_[j] = std::min(cut_orthogonality_[j], cut_orthogonality(i, j));
+      if (cut_orthogonality_[j] >= min_orthogonality) {
+        indices.push_back(j);
+        cut_scores_[j] = weight_distance * cut_distances_[j] + weight_orthogonality * cut_orthogonality_[j];
+        //settings_.log.printf("Recomputed cut %d score %e\n", j, cut_scores_[j]);
+      }
+    }
+
+    sorted_indices = indices;
+    indices.clear();
+    //settings_.log.printf("Sorting %d cuts\n", sorted_indices.size());
+
+    std::sort(sorted_indices.begin(), sorted_indices.end(), [&](i_t a, i_t b) {
+        return cut_scores_[a] > cut_scores_[b];
+    });
+  }
+}
+
+template <typename i_t, typename f_t>
+i_t cut_pool_t<i_t, f_t>::get_best_cuts(csr_matrix_t<i_t, f_t>& best_cuts, std::vector<f_t>& best_rhs)
+{
+  best_cuts.m = 0;
+  best_cuts.n = cut_storage_.n;
+  best_cuts.row_start.clear();
+  best_cuts.j.clear();
+  best_cuts.x.clear();
+  best_cuts.row_start.reserve(scored_cuts_ + 1);
+  best_cuts.row_start.push_back(0);
+
+  for (i_t i: best_cuts_) {
+    sparse_vector_t<i_t, f_t> cut(cut_storage_, i);
+    cut.negate();
+    best_cuts.append_row(cut);
+    //settings_.log.printf("Best cuts nz %d\n", best_cuts.row_start[best_cuts.m]);
+    best_rhs.push_back(-rhs_storage_[i]);
+  }
+
+  return static_cast<i_t>(best_cuts_.size());
+}
+
+
+template <typename i_t, typename f_t>
+void cut_pool_t<i_t, f_t>::age_cuts()
+{
+  for (i_t i = 0; i < cut_age_.size(); i++) {
+    cut_age_[i]++;
+  }
+}
+
+template <typename i_t, typename f_t>
+void cut_pool_t<i_t, f_t>::drop_cuts()
+{
+   // TODO: Implement this
+}
+
+template <typename i_t, typename f_t>
+void cut_generation_t<i_t, f_t>::generate_cuts(const lp_problem_t<i_t, f_t>& lp,
+                                               const simplex_solver_settings_t<i_t, f_t>& settings,
+                                               csc_matrix_t<i_t, f_t>& Arow,
+                                               const std::vector<variable_type_t>& var_types,
+                                               basis_update_mpf_t<i_t, f_t>& basis_update,
+                                               const std::vector<f_t>& xstar,
+                                               const std::vector<i_t>& basic_list,
+                                               const std::vector<i_t>& nonbasic_list)
+{
+  // Generate Gomory Cuts
+  generate_gomory_cuts(
+    lp, settings, Arow, var_types, basis_update, xstar, basic_list, nonbasic_list);
+
+
+ // Generate MIR cuts
+ // generate_mir_cuts(lp, settings, Arow, var_types, xstar);
+}
+
+template <typename i_t, typename f_t>
+void cut_generation_t<i_t, f_t>::generate_mir_cuts(const lp_problem_t<i_t, f_t>& lp,
+                                                   const simplex_solver_settings_t<i_t, f_t>& settings,
+                                                   csc_matrix_t<i_t, f_t>& Arow,
+                                                   const std::vector<variable_type_t>& var_types,
+                                                   const std::vector<f_t>& xstar)
+{
+  mixed_integer_rounding_cut_t<i_t, f_t> mir(lp.num_cols, settings);
+  mir.initialize(lp, xstar);
+
+  for (i_t i = 0; i < lp.num_rows; i++) {
+    sparse_vector_t<i_t, f_t> inequality(Arow, i);
+    f_t inequality_rhs = lp.rhs[i];
+
+    const i_t row_start = Arow.col_start[i];
+    const i_t row_end = Arow.col_start[i + 1];
+    i_t last_slack = -1;
+    for (i_t p = row_start; p < row_end; p++) {
+      const i_t j = Arow.i[p];
+      const f_t a = Arow.x[p];
+      if (var_types[j] == variable_type_t::CONTINUOUS && a == 1.0 && lp.lower[j] == 0.0) {
+        last_slack = j;
+      }
+    }
+
+    if (last_slack != -1) {
+        // Remove the slack from the equality to get an inequality
+        for (i_t k = 0; k < inequality.i.size(); k++) {
+          const i_t j = inequality.i[k];
+          if (j == last_slack) {
+            inequality.x[k] = 0.0;
+          }
+        }
+
+        // inequaility'*x <= inequality_rhs
+        // But for MIR we need: inequality'*x >= inequality_rhs
+        inequality_rhs *= -1;
+        inequality.negate();
+
+        sparse_vector_t<i_t, f_t> cut(lp.num_cols, 0);
+        f_t cut_rhs;
+        i_t mir_status = mir.generate_cut(inequality, inequality_rhs, lp.upper, lp.lower, var_types, cut, cut_rhs);
+        if (mir_status == 0) {
+          f_t dot = 0.0;
+          f_t cut_norm = 0.0;
+          for (i_t k = 0; k < cut.i.size(); k++) {
+            const i_t jj = cut.i[k];
+            const f_t aj = cut.x[k];
+            dot += aj * xstar[jj];
+            cut_norm += aj * aj;
+          }
+          if (dot >= cut_rhs) {
+            continue;
+          }
+        }
+
+        settings.log.printf("Adding MIR cut %d\n", i);
+        cut_pool_.add_cut(lp.num_cols, cut, cut_rhs);
+    }
+  }
+}
+
+template <typename i_t, typename f_t>
+void cut_generation_t<i_t, f_t>::generate_gomory_cuts(
+  const lp_problem_t<i_t, f_t>& lp,
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  csc_matrix_t<i_t, f_t>& Arow,
+  const std::vector<variable_type_t>& var_types,
+  basis_update_mpf_t<i_t, f_t>& basis_update,
+  const std::vector<f_t>& xstar,
+  const std::vector<i_t>& basic_list,
+  const std::vector<i_t>& nonbasic_list)
+{
+  mixed_integer_gomory_base_inequality_t<i_t, f_t> gomory(lp, basis_update, nonbasic_list);
+  mixed_integer_rounding_cut_t<i_t, f_t> mir(lp.num_cols, settings);
+
+  mir.initialize(lp, xstar);
+
+  for (i_t i = 0; i < lp.num_rows; i++) {
+    sparse_vector_t<i_t, f_t> inequality(lp.num_cols, 0);
+    f_t inequality_rhs;
+    const i_t j = basic_list[i];
+    if (var_types[j] != variable_type_t::INTEGER) { continue; }
+    const f_t x_j = xstar[j];
+    if (std::abs(x_j - std::round(x_j)) < settings.integer_tol) { continue; }
+    i_t gomory_status = gomory.generate_base_inequality(lp,
+                                                        settings,
+                                                        Arow,
+                                                        var_types,
+                                                        basis_update,
+                                                        xstar,
+                                                        basic_list,
+                                                        nonbasic_list,
+                                                        i,
+                                                        inequality,
+                                                        inequality_rhs);
+    if (gomory_status == 0) {
+      // Given the base inequality, generate a MIR cut
+      sparse_vector_t<i_t, f_t> cut_A(lp.num_cols, 0);
+      f_t cut_A_rhs;
+      i_t mir_status =
+        mir.generate_cut(inequality, inequality_rhs, lp.upper, lp.lower, var_types, cut_A, cut_A_rhs);
+      bool A_valid = false;
+      f_t cut_A_distance = 0.0;
+      if (mir_status == 0) {
+        // Check that the cut is violated
+        f_t dot = 0.0;
+        f_t cut_norm = 0.0;
+        for (i_t k = 0; k < cut_A.i.size(); k++) {
+          const i_t jj = cut_A.i[k];
+          const f_t aj = cut_A.x[k];
+          dot += aj * xstar[jj];
+          cut_norm += aj * aj;
+        }
+        if (dot >= cut_A_rhs) {
+          settings.log.printf("Cut %d is not violated. Skipping\n", i);
+          continue;
+        }
+        cut_A_distance = (cut_A_rhs - dot) / std::sqrt(cut_norm);
+        A_valid = true;
+        //cut_pool_.add_cut(lp.num_cols, cut, cut_rhs);
+      }
+
+      // Negate the base inequality
+      inequality.negate();
+      inequality_rhs *= -1;
+
+      sparse_vector_t<i_t, f_t> cut_B(lp.num_cols, 0);
+      f_t cut_B_rhs;
+
+      mir_status =
+        mir.generate_cut(inequality, inequality_rhs, lp.upper, lp.lower, var_types, cut_B, cut_B_rhs);
+      bool B_valid = false;
+      f_t cut_B_distance = 0.0;
+      if (mir_status == 0) {
+        // Check that the cut is violated
+        f_t dot = 0.0;
+        f_t cut_norm = 0.0;
+        for (i_t k = 0; k < cut_B.i.size(); k++) {
+          const i_t jj = cut_B.i[k];
+          const f_t aj = cut_B.x[k];
+          dot += aj * xstar[jj];
+          cut_norm += aj * aj;
+        }
+        if (dot >= cut_B_rhs) {
+          settings.log.printf("Cut %d is not violated. Skipping\n", i);
+          continue;
+        }
+        cut_B_distance = (cut_B_rhs - dot) / std::sqrt(cut_norm);
+        B_valid = true;
+        // cut_pool_.add_cut(lp.num_cols, cut_B, cut_B_rhs);
+      }
+
+      if ((cut_A_distance > cut_B_distance) && A_valid) {
+        cut_pool_.add_cut(lp.num_cols, cut_A, cut_A_rhs);
+      } else if (B_valid) {
+        cut_pool_.add_cut(lp.num_cols, cut_B, cut_B_rhs);
+      }
+    }
+  }
+}
+
+template <typename i_t, typename f_t>
+i_t mixed_integer_gomory_base_inequality_t<i_t, f_t>::generate_base_inequality(
+  const lp_problem_t<i_t, f_t>& lp,
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  csc_matrix_t<i_t, f_t>& Arow,
+  const std::vector<variable_type_t>& var_types,
+  basis_update_mpf_t<i_t, f_t>& basis_update,
+  const std::vector<f_t>& xstar,
+  const std::vector<i_t>& basic_list,
+  const std::vector<i_t>& nonbasic_list,
+  i_t i,
+  sparse_vector_t<i_t, f_t>& inequality,
+  f_t& inequality_rhs)
+{
+  // Let's look for Gomory cuts
+    const i_t j = basic_list[i];
+    if (var_types[j] != variable_type_t::INTEGER) { return -1; }
+    const f_t x_j = xstar[j];
+    if (std::abs(x_j - std::round(x_j)) < settings.integer_tol) { return -1; }
+#ifdef PRINT_CUT_INFO
+    settings_.log.printf("Generating cut for variable %d relaxed value %e row %d\n", j, x_j, i);
+#endif
+#ifdef PRINT_BASIS
+    for (i_t h = 0; h < basic_list.size(); h++) {
+      settings_.log.printf("basic_list[%d] = %d\n", h, basic_list[h]);
+    }
+#endif
+
+    // Solve B^T u_bar = e_i
+    sparse_vector_t<i_t, f_t> e_i(lp.num_rows, 1);
+    e_i.i[0] = i;
+    e_i.x[0] = 1.0;
+    sparse_vector_t<i_t, f_t> u_bar(lp.num_rows, 0);
+    basis_update.b_transpose_solve(e_i, u_bar);
+
+
+#ifdef CHECK_B_TRANSPOSE_SOLVE
+    std::vector<f_t> u_bar_dense(lp.num_rows);
+    u_bar.to_dense(u_bar_dense);
+
+    std::vector<f_t> BTu_bar(lp.num_rows);
+    b_transpose_multiply(lp, basic_list, u_bar_dense, BTu_bar);
+    for (i_t k = 0; k < lp.num_rows; k++) {
+      if (k == i) {
+        if (std::abs(BTu_bar[k] - 1.0) > 1e-6) {
+          settings_.log.printf("BTu_bar[%d] = %e i %d\n", k, BTu_bar[k], i);
+          exit(1);
+        }
+      } else {
+        if (std::abs(BTu_bar[k]) > 1e-6) {
+          settings_.log.printf("BTu_bar[%d] = %e i %d\n", k, BTu_bar[k], i);
+          exit(1);
+        }
+      }
+    }
+#endif
+
+    // Compute a_bar = N^T u_bar
+    // TODO: This is similar to a function in phase2 of dual simplex. See if it can be reused.
+    const i_t nz_ubar = u_bar.i.size();
+    std::vector<i_t> abar_indices;
+    abar_indices.reserve(nz_ubar);
+    for (i_t k = 0; k < nz_ubar; k++) {
+      const i_t ii        = u_bar.i[k];
+      const f_t u_bar_i   = u_bar.x[k];
+      const i_t row_start = Arow.col_start[ii];
+      const i_t row_end   = Arow.col_start[ii + 1];
+      for (i_t p = row_start; p < row_end; p++) {
+        const i_t jj = Arow.i[p];
+        if (nonbasic_mark_[jj] == 1) {
+          x_workspace_[jj] += u_bar_i * Arow.x[p];
+          if (!x_mark_[jj]) {
+            x_mark_[jj] = 1;
+            abar_indices.push_back(jj);
+          }
+        }
+      }
+    }
+
+    sparse_vector_t<i_t, f_t> a_bar(lp.num_cols, abar_indices.size() + 1);
+    for (i_t k = 0; k < abar_indices.size(); k++) {
+      const i_t jj = abar_indices[k];
+      a_bar.i[k]   = jj;
+      a_bar.x[k]   = x_workspace_[jj];
+    }
+
+    // Clear the workspace
+    for (i_t jj : abar_indices) {
+      x_workspace_[jj] = 0.0;
+      x_mark_[jj]      = 0;
+    }
+    abar_indices.clear();
+
+    // We should now have the base inequality
+    // x_j + a_bar^T x_N >= b_bar_i
+    // We add x_j into a_bar so that everything is in a single sparse_vector_t
+    a_bar.i[a_bar.i.size() - 1] = j;
+    a_bar.x[a_bar.x.size() - 1] = 1.0;
+
+#ifdef CHECK_A_BAR_DENSE_DOT
+    std::vector<f_t> a_bar_dense(lp.num_cols);
+    a_bar.to_dense(a_bar_dense);
+
+    f_t a_bar_dense_dot = dot<i_t, f_t>(a_bar_dense, xstar);
+    if (std::abs(a_bar_dense_dot - b_bar[i]) > 1e-6) {
+      settings_.log.printf("a_bar_dense_dot = %e b_bar[%d] = %e\n", a_bar_dense_dot, i, b_bar[i]);
+      settings_.log.printf("x_j %e b_bar_i %e\n", x_j, b_bar[i]);
+      exit(1);
+    }
+#endif
+
+    // We have that x_j + a_bar^T x_N == b_bar_i
+    // So x_j + a_bar^T x_N >= b_bar_i
+    // And x_j + a_bar^T x_N <= b_bar_i
+    // Or -x_j - a_bar^T x_N >= -b_bar_i
+
+#ifdef PRINT_CUT
+    {
+      settings_.log.printf("Cut %d\n", i);
+      for (i_t k = 0; k < a_bar.i.size(); k++) {
+        const i_t jj = a_bar.i[k];
+        const f_t aj = a_bar.x[k];
+        settings_.log.printf("(%d, %e) ", jj, aj);
+      }
+      settings_.log.printf("\nEnd cut %d b_bar[%d] = %e\n", i, b_bar[i]);
+    }
+#endif
+
+    // Skip cuts that are shallow
+    const f_t shallow_tol = 1e-2;
+    if (std::abs(x_j - std::round(x_j)) < shallow_tol) {
+      //settings_.log.printf("Skipping shallow cut %d. b_bar[%d] = %e x_j %e\n", i, i, b_bar[i], x_j);
+      return -1;
+    }
+
+    const f_t f_val = b_bar_[i] - std::floor(b_bar_[i]);
+    if (f_val < 0.01 || f_val > 0.99) {
+      //settings_.log.printf("Skipping cut %d. b_bar[%d] = %e f_val %e\n", i, i, b_bar[i], f_val);
+      return -1;
+    }
+
+#ifdef PRINT_BASE_INEQUALITY
+    // Print out the base inequality
+    for (i_t k = 0; k < a_bar.i.size(); k++) {
+      const i_t jj = a_bar.i[k];
+      const f_t aj = a_bar.x[k];
+      settings_.log.printf("a_bar[%d] = %e\n", k, aj);
+    }
+    settings_.log.printf("b_bar[%d] = %e\n", i, b_bar[i]);
+#endif
+
+    inequality = a_bar;
+    inequality_rhs = b_bar_[i];
+
+    return 0;
+}
+
+template <typename i_t, typename f_t>
+void mixed_integer_rounding_cut_t<i_t, f_t>::initialize(const lp_problem_t<i_t, f_t>& lp,
+                                              const std::vector<f_t>& xstar)
+{
+
+  if (lp.num_cols != num_vars_) {
+    num_vars_ = lp.num_cols;
+    x_workspace_.resize(num_vars_, 0.0);
+    x_mark_.resize(num_vars_, 0);
+    has_lower_.resize(num_vars_, 0);
+    has_upper_.resize(num_vars_, 0);
+  }
+
+
+  needs_complement_ = false;
+  for (i_t j = 0; j < lp.num_cols; j++) {
+    if (lp.lower[j] < 0) {
+      settings_.log.printf("Variable %d has negative lower bound %e\n", j, lp.lower[j]);
+      exit(1);
+    }
+    const f_t uj = lp.upper[j];
+    const f_t lj = lp.lower[j];
+    if (uj != inf || lj != 0.0) { needs_complement_ = true; }
+    const f_t xstar_j = xstar[j];
+    if (uj < inf) {
+      if (uj - xstar_j <= xstar_j - lj) {
+        has_upper_[j] = 1;
+      } else {
+        has_lower_[j] = 1;
+      }
+      continue;
+    }
+
+    if (lj > -inf) { has_lower_[j] = 1; }
+  }
+}
+
+template <typename i_t, typename f_t>
+i_t mixed_integer_rounding_cut_t<i_t, f_t>::generate_cut(
+  const sparse_vector_t<i_t, f_t>& a,
+  f_t beta,
+  const std::vector<f_t>& upper_bounds,
+  const std::vector<f_t>& lower_bounds,
+  const std::vector<variable_type_t>& var_types,
+  sparse_vector_t<i_t, f_t>& cut,
+  f_t& cut_rhs)
+{
+  auto f = [](f_t q_1, f_t q_2) -> f_t {
+    f_t q_1_hat = q_1 - std::floor(q_1);
+    f_t q_2_hat = q_2 - std::floor(q_2);
+    return std::min(q_1_hat, q_2_hat) + q_2_hat * std::floor(q_1);
+  };
+
+  auto h = [](f_t q) -> f_t { return std::max(q, 0.0); };
+
+  std::vector<i_t> cut_indices;
+  cut_indices.reserve(a.i.size());
+  f_t R;
+  if (!needs_complement_) {
+    R = (beta - std::floor(beta)) * std::ceil(beta);
+
+    for (i_t k = 0; k < a.i.size(); k++) {
+      const i_t jj = a.i[k];
+      f_t aj       = a.x[k];
+      if (var_types[jj] == variable_type_t::INTEGER) {
+        x_workspace_[jj] += f(aj, beta);
+        if (!x_mark_[jj] && x_workspace_[jj] != 0.0) {
+          x_mark_[jj] = 1;
+          cut_indices.push_back(jj);
+        }
+      } else {
+        x_workspace_[jj] += h(aj);
+        if (!x_mark_[jj] && x_workspace_[jj] != 0.0) {
+          x_mark_[jj] = 1;
+          cut_indices.push_back(jj);
+        }
+      }
+    }
+  } else {
+    // Compute r
+    f_t r = beta;
+    for (i_t k = 0; k < a.i.size(); k++) {
+      const i_t jj = a.i[k];
+      if (has_upper_[jj]) {
+        const f_t uj = upper_bounds[jj];
+        r -= uj * a.x[k];
+        continue;
+      }
+      if (has_lower_[jj]) {
+        const f_t lj = lower_bounds[jj];
+        r -= lj * a.x[k];
+      }
+    }
+
+    // Compute R
+    R = std::ceil(r) * (r - std::floor(r));
+    for (i_t k = 0; k < a.i.size(); k++) {
+      const i_t jj = a.i[k];
+      const f_t aj = a.x[k];
+      if (has_upper_[jj]) {
+        const f_t uj = upper_bounds[jj];
+        if (var_types[jj] == variable_type_t::INTEGER) {
+          R -= f(-aj, r) * uj;
+        } else {
+          R -= h(-aj) * uj;
+        }
+      } else if (has_lower_[jj]) {
+        const f_t lj = lower_bounds[jj];
+        if (var_types[jj] == variable_type_t::INTEGER) {
+          R += f(aj, r) * lj;
+        } else {
+          R += h(aj) * lj;
+        }
+      }
+    }
+
+    // Compute the cut coefficients
+    for (i_t k = 0; k < a.i.size(); k++) {
+      const i_t jj = a.i[k];
+      const f_t aj = a.x[k];
+      if (has_upper_[jj]) {
+        if (var_types[jj] == variable_type_t::INTEGER) {
+          // Upper intersect I
+          x_workspace_[jj] -= f(-aj, r);
+          if (!x_mark_[jj] && x_workspace_[jj] != 0.0) {
+            x_mark_[jj] = 1;
+            cut_indices.push_back(jj);
+          }
+        } else {
+          // Upper intersect C
+          f_t h_j = h(-aj);
+          if (h_j != 0.0) {
+            x_workspace_[jj] -= h_j;
+            if (!x_mark_[jj]) {
+              x_mark_[jj] = 1;
+              cut_indices.push_back(jj);
+            }
+          }
+        }
+      } else if (var_types[jj] == variable_type_t::INTEGER) {
+        // I \ Upper
+        x_workspace_[jj] += f(aj, r);
+        if (!x_mark_[jj] && x_workspace_[jj] != 0.0) {
+          x_mark_[jj] = 1;
+          cut_indices.push_back(jj);
+        }
+      } else {
+        // C \ Upper
+        f_t h_j = h(aj);
+        if (h_j != 0.0) {
+          x_workspace_[jj] += h_j;
+          if (!x_mark_[jj]) {
+            x_mark_[jj] = 1;
+            cut_indices.push_back(jj);
+          }
+        }
+      }
+    }
+  }
+
+  cut.i.reserve(cut_indices.size());
+  cut.x.reserve(cut_indices.size());
+  for (i_t k = 0; k < cut_indices.size(); k++) {
+    const i_t jj = cut_indices[k];
+
+    // Check for small coefficients
+    const f_t aj = x_workspace_[jj];
+    if (std::abs(aj) < 1e-6) {
+      if (aj >= 0.0 && upper_bounds[jj] < inf) {
+        // Move this to the right-hand side
+        R -= aj * upper_bounds[jj];
+        continue;
+      } else if (aj <= 0.0 && lower_bounds[jj] > -inf) {
+        R += aj * lower_bounds[jj];
+        continue;
+      } else {
+      }
+    }
+    cut.i.push_back(jj);
+    cut.x.push_back(x_workspace_[jj]);
+  }
+
+  // Clear the workspace
+  for (i_t jj : cut_indices) {
+    x_workspace_[jj] = 0.0;
+    x_mark_[jj]      = 0;
+  }
+
+
+  // The new cut is: g'*x >= R
+  // But we want to have it in the form h'*x <= b
+  cut.sort();
+
+  cut_rhs = R;
+  return 0;
+}
+
+template <typename i_t, typename f_t>
+i_t add_cuts(const simplex_solver_settings_t<i_t, f_t>& settings,
+             const csr_matrix_t<i_t, f_t>& cuts,
+             const std::vector<f_t>& cut_rhs,
+             lp_problem_t<i_t, f_t>& lp,
+             lp_solution_t<i_t, f_t>& solution,
+             basis_update_mpf_t<i_t, f_t>& basis_update,
+             std::vector<i_t>& basic_list,
+             std::vector<i_t>& nonbasic_list,
+             std::vector<variable_status_t>& vstatus,
+             std::vector<f_t>& edge_norms)
+
+{
+  // Given a set of cuts: C*x <= d that are currently violated
+  // by the current solution x* (i.e. C*x* > d), this function
+  // adds the cuts into the LP and solves again.
+
+#ifdef CHECK_BASIS
+  {
+    csc_matrix_t<i_t, f_t> Btest(lp.num_rows, lp.num_rows, 1);
+    basis_update.multiply_lu(Btest);
+    csc_matrix_t<i_t, f_t> B(lp.num_rows, lp.num_rows, 1);
+    form_b(lp.A, basic_list, B);
+    csc_matrix_t<i_t, f_t> Diff(lp.num_rows, lp.num_rows, 1);
+    add(Btest, B, 1.0, -1.0, Diff);
+    const f_t err = Diff.norm1();
+    settings.log.printf("Before || B - L*U || %e\n", err);
+    if (err > 1e-6) { exit(1); }
+  }
+#endif
+
+  const i_t p = cuts.m;
+  if (cut_rhs.size() != static_cast<size_t>(p)) {
+    settings.log.printf("cut_rhs must have the same number of rows as cuts\n");
+    return -1;
+  }
+  settings.log.debug("Number of cuts %d\n", p);
+  settings.log.debug("Original lp rows %d\n", lp.num_rows);
+  settings.log.debug("Original lp cols %d\n", lp.num_cols);
+
+  csr_matrix_t<i_t, f_t> new_A_row(lp.num_rows, lp.num_cols, 1);
+  lp.A.to_compressed_row(new_A_row);
+
+  new_A_row.append_rows(cuts);
+
+  csc_matrix_t<i_t, f_t> new_A_col(lp.num_rows + p, lp.num_cols, 1);
+  new_A_row.to_compressed_col(new_A_col);
+
+  // Add in slacks variables for the new rows
+  lp.lower.resize(lp.num_cols + p);
+  lp.upper.resize(lp.num_cols + p);
+  lp.objective.resize(lp.num_cols + p);
+  i_t nz = new_A_col.col_start[lp.num_cols];
+  new_A_col.col_start.resize(lp.num_cols + p + 1);
+  new_A_col.i.resize(nz + p);
+  new_A_col.x.resize(nz + p);
+  i_t k = lp.num_rows;
+  for (i_t j = lp.num_cols; j < lp.num_cols + p; j++) {
+    new_A_col.col_start[j] = nz;
+    new_A_col.i[nz]        = k++;
+    new_A_col.x[nz]        = 1.0;
+    nz++;
+    lp.lower[j]     = 0.0;
+    lp.upper[j]     = inf;
+    lp.objective[j] = 0.0;
+  }
+  settings.log.debug("Done adding slacks\n");
+  new_A_col.col_start[lp.num_cols + p] = nz;
+  new_A_col.n                          = lp.num_cols + p;
+
+  lp.A         = new_A_col;
+  i_t old_rows = lp.num_rows;
+  lp.num_rows += p;
+  i_t old_cols = lp.num_cols;
+  lp.num_cols += p;
+
+  lp.rhs.resize(lp.num_rows);
+  for (i_t k = old_rows; k < old_rows + p; k++) {
+    const i_t h = k - old_rows;
+    lp.rhs[k]   = cut_rhs[h];
+  }
+  settings.log.debug("Done adding rhs\n");
+
+  // Construct C_B = C(:, basic_list)
+  std::vector<i_t> C_col_degree(lp.num_cols, 0);
+  i_t cuts_nz = cuts.row_start[p];
+  for (i_t q = 0; q < cuts_nz; q++) {
+    const i_t j = cuts.j[q];
+    if (j >= lp.num_cols) {
+      settings.log.printf("j %d is greater than p %d\n", j, p);
+      return -1;
+    }
+    C_col_degree[j]++;
+  }
+  settings.log.debug("Done computing C_col_degree\n");
+
+  std::vector<i_t> in_basis(old_cols, -1);
+  const i_t num_basic = static_cast<i_t>(basic_list.size());
+  i_t C_B_nz          = 0;
+  for (i_t k = 0; k < num_basic; k++) {
+    const i_t j = basic_list[k];
+    if (j < 0 || j >= old_cols) {
+      settings.log.printf(
+        "basic_list[%d] = %d is out of bounds %d old_cols %d\n", k, j, j, old_cols);
+      return -1;
+    }
+    in_basis[j] = k;
+    if (j < cuts.n) { C_B_nz += C_col_degree[j]; }
+  }
+  settings.log.debug("Done estimating C_B_nz\n");
+
+  csr_matrix_t<i_t, f_t> C_B(p, num_basic, C_B_nz);
+  nz = 0;
+  for (i_t i = 0; i < p; i++) {
+    C_B.row_start[i]    = nz;
+    const i_t row_start = cuts.row_start[i];
+    const i_t row_end   = cuts.row_start[i + 1];
+    for (i_t q = row_start; q < row_end; q++) {
+      const i_t j       = cuts.j[q];
+      const i_t j_basis = in_basis[j];
+      if (j_basis == -1) { continue; }
+      C_B.j[nz] = j_basis;
+      C_B.x[nz] = cuts.x[q];
+      nz++;
+    }
+  }
+  C_B.row_start[p] = nz;
+
+  if (nz != C_B_nz) {
+    settings.log.printf("predicted nz %d actual nz %d\n", C_B_nz, nz);
+    return -1;
+  }
+  settings.log.debug("C_B rows %d cols %d nz %d\n", C_B.m, C_B.n, nz);
+
+  // Adjust the basis update to include the new cuts
+  basis_update.append_cuts(C_B);
+
+  basic_list.resize(lp.num_rows, 0);
+  i_t h = old_cols;
+  for (i_t j = old_rows; j < lp.num_rows; j++) {
+    basic_list[j] = h++;
+  }
+
+#ifdef CHECK_BASIS
+  // Check the basis update
+  csc_matrix_t<i_t, f_t> Btest(lp.num_rows, lp.num_rows, 1);
+  basis_update.multiply_lu(Btest);
+
+  csc_matrix_t<i_t, f_t> B(lp.num_rows, lp.num_rows, 1);
+  form_b(lp.A, basic_list, B);
+
+  csc_matrix_t<i_t, f_t> Diff(lp.num_rows, lp.num_rows, 1);
+  add(Btest, B, 1.0, -1.0, Diff);
+  const f_t err = Diff.norm1();
+  settings.log.printf("After || B - L*U || %e\n", err);
+  if (err > 1e-6) {
+    settings.log.printf("Diff matrix\n");
+    // Diff.print_matrix();
+    exit(1);
+  }
+#endif
+  // Adjust the vstatus
+  vstatus.resize(lp.num_cols);
+  for (i_t j = old_cols; j < lp.num_cols; j++) {
+    vstatus[j] = variable_status_t::BASIC;
+  }
+
+  return 0;
+}
+
+template <typename i_t, typename f_t>
+void remove_cuts(lp_problem_t<i_t, f_t>& lp,
+                 const simplex_solver_settings_t<i_t, f_t>& settings,
+                 csc_matrix_t<i_t, f_t>& Arow,
+                 i_t original_rows,
+                 std::vector<variable_type_t>& var_types,
+                 std::vector<variable_status_t>& vstatus,
+                 std::vector<f_t>& x,
+                 std::vector<f_t>& y,
+                 std::vector<f_t>& z,
+                 std::vector<i_t>& basic_list,
+                 std::vector<i_t>& nonbasic_list,
+                 basis_update_mpf_t<i_t, f_t>& basis_update)
+{
+  std::vector<i_t> cuts_to_remove;
+  cuts_to_remove.reserve(lp.num_rows - original_rows);
+  std::vector<i_t> slacks_to_remove;
+  slacks_to_remove.reserve(lp.num_rows - original_rows);
+  const f_t dual_tol = 1e-10;
+  for (i_t k = original_rows; k < lp.num_rows; k++) {
+    if (std::abs(y[k]) < dual_tol) {
+      const i_t row_start = Arow.col_start[k];
+      const i_t row_end   = Arow.col_start[k + 1];
+      i_t last_slack      = -1;
+      const f_t slack_tol = 1e-3;
+      for (i_t p = row_start; p < row_end; p++) {
+        const i_t jj      = Arow.i[p];
+        const i_t col_len = lp.A.col_start[jj + 1] - lp.A.col_start[jj];
+        if (col_len == 1 && var_types[jj] == variable_type_t::CONTINUOUS && Arow.x[p] == 1.0 &&
+            lp.lower[jj] == 0.0) {
+          if (vstatus[jj] == variable_status_t::BASIC && x[jj] > slack_tol) { last_slack = jj; }
+        }
+      }
+      if (last_slack != -1) {
+        cuts_to_remove.push_back(k);
+        slacks_to_remove.push_back(last_slack);
+      }
+    }
+  }
+
+  if (cuts_to_remove.size() > 0) {
+    settings.log.printf("Removing %d cuts\n", cuts_to_remove.size());
+    std::vector<i_t> marked_rows(lp.num_rows, 0);
+    for (i_t i : cuts_to_remove) {
+      marked_rows[i] = 1;
+    }
+    std::vector<i_t> marked_cols(lp.num_cols, 0);
+    for (i_t j : slacks_to_remove) {
+      marked_cols[j] = 1;
+    }
+
+    std::vector<f_t> new_rhs(lp.num_rows - cuts_to_remove.size());
+    std::vector<f_t> new_solution_y(lp.num_rows - cuts_to_remove.size());
+    i_t h = 0;
+    for (i_t i = 0; i < lp.num_rows; i++) {
+      if (!marked_rows[i]) {
+        new_rhs[h]        = lp.rhs[i];
+        new_solution_y[h] = y[i];
+        h++;
+      }
+    }
+
+    Arow.remove_columns(marked_rows);
+    Arow.transpose(lp.A);
+
+    std::vector<f_t> new_objective(lp.num_cols - slacks_to_remove.size());
+    std::vector<f_t> new_lower(lp.num_cols - slacks_to_remove.size());
+    std::vector<f_t> new_upper(lp.num_cols - slacks_to_remove.size());
+    std::vector<variable_type_t> new_var_types(lp.num_cols - slacks_to_remove.size());
+    std::vector<variable_status_t> new_vstatus(lp.num_cols - slacks_to_remove.size());
+    std::vector<i_t> new_basic_list;
+    new_basic_list.reserve(lp.num_rows - slacks_to_remove.size());
+    std::vector<i_t> new_nonbasic_list;
+    new_nonbasic_list.reserve(nonbasic_list.size());
+    std::vector<f_t> new_solution_x(lp.num_cols - slacks_to_remove.size());
+    std::vector<f_t> new_solution_z(lp.num_cols - slacks_to_remove.size());
+    h = 0;
+    for (i_t k = 0; k < lp.num_cols; k++) {
+      if (!marked_cols[k]) {
+        new_objective[h]  = lp.objective[k];
+        new_lower[h]      = lp.lower[k];
+        new_upper[h]      = lp.upper[k];
+        new_var_types[h]  = var_types[k];
+        new_vstatus[h]    = vstatus[k];
+        new_solution_x[h] = x[k];
+        new_solution_z[h] = z[k];
+        if (new_vstatus[h] != variable_status_t::BASIC) {
+          new_nonbasic_list.push_back(h);
+        } else {
+          new_basic_list.push_back(h);
+        }
+        h++;
+      }
+    }
+    lp.A.remove_columns(marked_cols);
+    lp.A.transpose(Arow);
+    lp.objective  = new_objective;
+    lp.lower      = new_lower;
+    lp.upper      = new_upper;
+    lp.rhs        = new_rhs;
+    var_types     = new_var_types;
+    lp.num_cols   = lp.A.n;
+    lp.num_rows   = lp.A.m;
+    basic_list    = new_basic_list;
+    nonbasic_list = new_nonbasic_list;
+    vstatus       = new_vstatus;
+    x             = new_solution_x;
+    y             = new_solution_y;
+    z             = new_solution_z;
+
+    settings.log.printf("After removal %d rows %d columns %d nonzeros\n",
+                        lp.num_rows,
+                        lp.num_cols,
+                        lp.A.col_start[lp.A.n]);
+
+    basis_update.resize(lp.num_rows);
+    basis_update.refactor_basis(lp.A, settings, basic_list, nonbasic_list, vstatus);
+  }
+}
+
+
+#ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE
+template class cut_pool_t<int, double>;
+template class cut_generation_t<int, double>;
+template class mixed_integer_gomory_base_inequality_t<int, double>;
+template class mixed_integer_rounding_cut_t<int, double>;
+
+template
+int add_cuts(const simplex_solver_settings_t<int, double>& settings,
+              const csr_matrix_t<int, double>& cuts,
+              const std::vector<double>& cut_rhs,
+              lp_problem_t<int, double>& lp,
+              lp_solution_t<int, double>& solution,
+              basis_update_mpf_t<int, double>& basis_update,
+              std::vector<int>& basic_list,
+              std::vector<int>& nonbasic_list,
+              std::vector<variable_status_t>& vstatus,
+              std::vector<double>& edge_norms);
+
+template
+void remove_cuts<int, double>(lp_problem_t<int, double>& lp,
+                 const simplex_solver_settings_t<int, double>& settings,
+                 csc_matrix_t<int, double>& Arow,
+                 int original_rows,
+                 std::vector<variable_type_t>& var_types,
+                 std::vector<variable_status_t>& vstatus,
+                 std::vector<double>& x,
+                 std::vector<double>& y,
+                 std::vector<double>& z,
+                 std::vector<int>& basic_list,
+                 std::vector<int>& nonbasic_list,
+                 basis_update_mpf_t<int, double>& basis_update);
+#endif
+
+} // namespace cuopt::linear_programming::dual_simplex
+
+
diff --git a/cpp/src/dual_simplex/cuts.hpp b/cpp/src/dual_simplex/cuts.hpp
new file mode 100644
index 000000000..14b6d0e1f
--- /dev/null
+++ b/cpp/src/dual_simplex/cuts.hpp
@@ -0,0 +1,221 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+#pragma once
+
+#include <dual_simplex/basis_updates.hpp>
+#include <dual_simplex/presolve.hpp>
+#include <dual_simplex/simplex_solver_settings.hpp>
+#include <dual_simplex/sparse_vector.hpp>
+#include <dual_simplex/types.hpp>
+#include <dual_simplex/user_problem.hpp>
+
+
+#include <cmath>
+
+namespace cuopt::linear_programming::dual_simplex {
+
+template <typename i_t, typename f_t>
+f_t minimum_violation(const csr_matrix_t<i_t, f_t>& C,
+                      const std::vector<f_t>& cut_rhs,
+                      const std::vector<f_t>& x)
+{
+  // Check to see that this is a cut i.e C*x > d
+  std::vector<f_t> Cx(C.m);
+  csc_matrix_t<i_t, f_t> C_col(C.m, C.n, 0);
+  C.to_compressed_col(C_col);
+  matrix_vector_multiply(C_col, 1.0, x, 0.0, Cx);
+  f_t min_cut_violation = inf;
+  for (i_t k = 0; k < Cx.size(); k++) {
+    if (Cx[k] <= cut_rhs[k]) {
+      printf("C*x <= d for cut %d. C*x %e rhs %e\n", k, Cx[k], cut_rhs[k]);
+    }
+    min_cut_violation = std::min(min_cut_violation, Cx[k] - cut_rhs[k]);
+  }
+  return min_cut_violation;
+}
+
+template <typename i_t, typename f_t>
+class cut_pool_t {
+ public:
+  cut_pool_t(i_t original_vars, const simplex_solver_settings_t<i_t, f_t>& settings)
+    : original_vars_(original_vars),
+      settings_(settings),
+      cut_storage_(0, original_vars, 0),
+      rhs_storage_(0),
+      cut_age_(0),
+      scored_cuts_(0)
+  {
+  }
+
+  // Add a cut in the form: cut'*x >= rhs.
+  // We expect that the cut is violated by the current relaxation
+  // cut'*xstart < rhs
+  void add_cut(i_t n, const sparse_vector_t<i_t, f_t>& cut, f_t rhs);
+
+  void score_cuts(std::vector<f_t>& x_relax);
+
+  // We return the cuts in the form best_cuts*x <= best_rhs
+  i_t get_best_cuts(csr_matrix_t<i_t, f_t>& best_cuts, std::vector<f_t>& best_rhs);
+
+  void age_cuts();
+
+  void drop_cuts();
+
+  i_t pool_size() const { return cut_storage_.m; }
+
+ private:
+  f_t cut_distance(i_t row, const std::vector<f_t>& x, f_t& cut_violation, f_t &cut_norm);
+  f_t cut_density(i_t row);
+  f_t cut_orthogonality(i_t i, i_t j);
+
+  i_t original_vars_;
+  const simplex_solver_settings_t<i_t, f_t>& settings_;
+
+  csr_matrix_t<i_t, f_t> cut_storage_;
+  std::vector<f_t> rhs_storage_;
+  std::vector<i_t> cut_age_;
+
+  i_t scored_cuts_;
+  std::vector<f_t> cut_distances_;
+  std::vector<f_t> cut_norms_;
+  std::vector<f_t> cut_orthogonality_;
+  std::vector<f_t> cut_scores_;
+  std::vector<i_t> best_cuts_;
+};
+
+template <typename i_t, typename f_t>
+class cut_generation_t {
+ public:
+  cut_generation_t(cut_pool_t<i_t, f_t>& cut_pool) : cut_pool_(cut_pool) {}
+
+
+  void generate_cuts(const lp_problem_t<i_t, f_t>& lp,
+                     const simplex_solver_settings_t<i_t, f_t>& settings,
+                     csc_matrix_t<i_t, f_t>& Arow,
+                     const std::vector<variable_type_t>& var_types,
+                     basis_update_mpf_t<i_t, f_t>& basis_update,
+                     const std::vector<f_t>& xstar,
+                     const std::vector<i_t>& basic_list,
+                     const std::vector<i_t>& nonbasic_list);
+ private:
+
+  void generate_gomory_cuts(const lp_problem_t<i_t, f_t>& lp,
+                            const simplex_solver_settings_t<i_t, f_t>& settings,
+                            csc_matrix_t<i_t, f_t>& Arow,
+                            const std::vector<variable_type_t>& var_types,
+                            basis_update_mpf_t<i_t, f_t>& basis_update,
+                            const std::vector<f_t>& xstar,
+                            const std::vector<i_t>& basic_list,
+                            const std::vector<i_t>& nonbasic_list);
+
+  void generate_mir_cuts(const lp_problem_t<i_t, f_t>& lp,
+                         const simplex_solver_settings_t<i_t, f_t>& settings,
+                         csc_matrix_t<i_t, f_t>& Arow,
+                         const std::vector<variable_type_t>& var_types,
+                         const std::vector<f_t>& xstar);
+  cut_pool_t<i_t, f_t>& cut_pool_;
+};
+
+template <typename i_t, typename f_t>
+class mixed_integer_gomory_base_inequality_t {
+ public:
+  mixed_integer_gomory_base_inequality_t(const lp_problem_t<i_t, f_t>& lp,
+                                         basis_update_mpf_t<i_t, f_t>& basis_update,
+                                         const std::vector<i_t> nonbasic_list)
+    : b_bar_(lp.num_rows, 0.0),
+      nonbasic_mark_(lp.num_cols, 0),
+      x_workspace_(lp.num_cols, 0.0),
+      x_mark_(lp.num_cols, 0)
+  {
+    basis_update.b_solve(lp.rhs, b_bar_);
+    for (i_t j : nonbasic_list) {
+      nonbasic_mark_[j] = 1;
+    }
+  }
+
+  // Generates the base inequalities: C*x == d that will be turned into cuts
+  i_t generate_base_inequality(const lp_problem_t<i_t, f_t>& lp,
+                               const simplex_solver_settings_t<i_t, f_t>& settings,
+                               csc_matrix_t<i_t, f_t>& Arow,
+                               const std::vector<variable_type_t>& var_types,
+                               basis_update_mpf_t<i_t, f_t>& basis_update,
+                               const std::vector<f_t>& xstar,
+                               const std::vector<i_t>& basic_list,
+                               const std::vector<i_t>& nonbasic_list,
+                               i_t i,
+                               sparse_vector_t<i_t, f_t>& inequality,
+                               f_t& inequality_rhs);
+
+ private:
+  std::vector<f_t> b_bar_;
+  std::vector<f_t> nonbasic_mark_;
+  std::vector<f_t> x_workspace_;
+  std::vector<i_t> x_mark_;
+};
+
+template <typename i_t, typename f_t>
+class mixed_integer_rounding_cut_t {
+ public:
+  mixed_integer_rounding_cut_t(i_t num_vars, const simplex_solver_settings_t<i_t, f_t>& settings)
+    : num_vars_(num_vars),
+      settings_(settings),
+      x_workspace_(num_vars, 0.0),
+      x_mark_(num_vars, 0),
+      has_lower_(num_vars, 0),
+      has_upper_(num_vars, 0),
+      needs_complement_(false)
+  {
+  }
+
+  void initialize(const lp_problem_t<i_t, f_t>& lp, const std::vector<f_t>& xstar);
+
+  i_t generate_cut(const sparse_vector_t<i_t, f_t>& a,
+                   f_t beta,
+                   const std::vector<f_t>& upper_bounds,
+                   const std::vector<f_t>& lower_bounds,
+                   const std::vector<variable_type_t>& var_types,
+                   sparse_vector_t<i_t, f_t>& cut,
+                   f_t& cut_rhs);
+
+ private:
+  i_t num_vars_;
+  const simplex_solver_settings_t<i_t, f_t>& settings_;
+  std::vector<f_t> x_workspace_;
+  std::vector<i_t> x_mark_;
+  std::vector<i_t> has_lower_;
+  std::vector<i_t> has_upper_;
+  bool needs_complement_;
+};
+
+template <typename i_t, typename f_t>
+i_t add_cuts(const simplex_solver_settings_t<i_t, f_t>& settings,
+             const csr_matrix_t<i_t, f_t>& cuts,
+             const std::vector<f_t>& cut_rhs,
+             lp_problem_t<i_t, f_t>& lp,
+             lp_solution_t<i_t, f_t>& solution,
+             basis_update_mpf_t<i_t, f_t>& basis_update,
+             std::vector<i_t>& basic_list,
+             std::vector<i_t>& nonbasic_list,
+             std::vector<variable_status_t>& vstatus,
+             std::vector<f_t>& edge_norms);
+
+template <typename i_t, typename f_t>
+void remove_cuts(lp_problem_t<i_t, f_t>& lp,
+                 const simplex_solver_settings_t<i_t, f_t>& settings,
+                 csc_matrix_t<i_t, f_t>& Arow,
+                 i_t original_rows,
+                 std::vector<variable_type_t>& var_types,
+                 std::vector<variable_status_t>& vstatus,
+                 std::vector<f_t>& x,
+                 std::vector<f_t>& y,
+                 std::vector<f_t>& z,
+                 std::vector<i_t>& basic_list,
+                 std::vector<i_t>& nonbasic_list,
+                 basis_update_mpf_t<i_t, f_t>& basis_update);
+
+}
+
diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp
index fcd8a6386..951945092 100644
--- a/cpp/src/dual_simplex/solve.cpp
+++ b/cpp/src/dual_simplex/solve.cpp
@@ -294,212 +294,6 @@ lp_status_t solve_linear_program_with_advanced_basis(
   return lp_status;
 }
 
-template <typename i_t, typename f_t>
-lp_status_t solve_linear_program_with_cuts(const f_t start_time,
-                                           const simplex_solver_settings_t<i_t, f_t>& settings,
-                                           const csr_matrix_t<i_t, f_t>& cuts,
-                                           const std::vector<f_t>& cut_rhs,
-                                           lp_problem_t<i_t, f_t>& lp,
-                                           lp_solution_t<i_t, f_t>& solution,
-                                           basis_update_mpf_t<i_t, f_t>& basis_update,
-                                           std::vector<i_t>& basic_list,
-                                           std::vector<i_t>& nonbasic_list,
-                                           std::vector<variable_status_t>& vstatus,
-                                           std::vector<f_t>& edge_norms)
-{
-  // Given a set of cuts: C*x <= d that are currently violated
-  // by the current solution x* (i.e. C*x* > d), this function
-  // adds the cuts into the LP and solves again.
-
-
-#ifdef CHECK_BASIS
-  {
-    csc_matrix_t<i_t, f_t> Btest(lp.num_rows, lp.num_rows, 1);
-    basis_update.multiply_lu(Btest);
-    csc_matrix_t<i_t, f_t> B(lp.num_rows, lp.num_rows, 1);
-    form_b(lp.A, basic_list, B);
-    csc_matrix_t<i_t, f_t> Diff(lp.num_rows, lp.num_rows, 1);
-    add(Btest, B, 1.0, -1.0, Diff);
-    const f_t err = Diff.norm1();
-    settings.log.printf("Before || B - L*U || %e\n", err);
-    if (err > 1e-6) {
-      exit(1);
-    }
-  }
-#endif
-
-  const i_t p = cuts.m;
-  if (cut_rhs.size() != static_cast<size_t>(p)) {
-    settings.log.printf("cut_rhs must have the same number of rows as cuts\n");
-    return lp_status_t::NUMERICAL_ISSUES;
-  }
-  settings.log.printf("Number of cuts %d\n", p);
-  settings.log.printf("Original lp rows %d\n", lp.num_rows);
-  settings.log.printf("Original lp cols %d\n", lp.num_cols);
-
-  csr_matrix_t<i_t, f_t> new_A_row(lp.num_rows, lp.num_cols, 1);
-  lp.A.to_compressed_row(new_A_row);
-
-  new_A_row.append_rows(cuts);
-
-  csc_matrix_t<i_t, f_t> new_A_col(lp.num_rows + p, lp.num_cols, 1);
-  new_A_row.to_compressed_col(new_A_col);
-
-  // Add in slacks variables for the new rows
-  lp.lower.resize(lp.num_cols + p);
-  lp.upper.resize(lp.num_cols + p);
-  lp.objective.resize(lp.num_cols + p);
-  i_t nz = new_A_col.col_start[lp.num_cols];
-  new_A_col.col_start.resize(lp.num_cols + p + 1);
-  new_A_col.i.resize(nz + p);
-  new_A_col.x.resize(nz + p);
-  i_t k = lp.num_rows;
-  for (i_t j = lp.num_cols; j < lp.num_cols + p; j++) {
-    new_A_col.col_start[j] = nz;
-    new_A_col.i[nz]        = k++;
-    new_A_col.x[nz]        = 1.0;
-    nz++;
-    lp.lower[j]     = 0.0;
-    lp.upper[j]     = inf;
-    lp.objective[j] = 0.0;
-  }
-  settings.log.printf("Done adding slacks\n");
-  new_A_col.col_start[lp.num_cols + p] = nz;
-  new_A_col.n                          = lp.num_cols + p;
-
-  lp.A         = new_A_col;
-  i_t old_rows = lp.num_rows;
-  lp.num_rows += p;
-  i_t old_cols = lp.num_cols;
-  lp.num_cols += p;
-
-
-  lp.rhs.resize(lp.num_rows);
-  for (i_t k = old_rows; k < old_rows + p; k++) {
-    const i_t h = k - old_rows;
-    lp.rhs[k]   = cut_rhs[h];
-  }
-  settings.log.printf("Done adding rhs\n");
-
-  // Construct C_B = C(:, basic_list)
-  std::vector<i_t> C_col_degree(lp.num_cols, 0);
-  i_t cuts_nz = cuts.row_start[p];
-  for (i_t q = 0; q < cuts_nz; q++) {
-    const i_t j = cuts.j[q];
-    if (j >= lp.num_cols) {
-      settings.log.printf("j %d is greater than p %d\n", j, p);
-      exit(1);
-    }
-    C_col_degree[j]++;
-  }
-  settings.log.printf("Done computing C_col_degree\n");
-
-  std::vector<i_t> in_basis(old_cols, -1);
-  const i_t num_basic = static_cast<i_t>(basic_list.size());
-  i_t C_B_nz          = 0;
-  for (i_t k = 0; k < num_basic; k++) {
-    const i_t j = basic_list[k];
-    if (j < 0 || j >= old_cols) {
-      settings.log.printf("basic_list[%d] = %d is out of bounds %d old_cols %d\n", k, j, j, old_cols);
-      exit(1);
-    }
-    in_basis[j] = k;
-    if (j < cuts.n)
-    {
-      C_B_nz += C_col_degree[j];
-    }
-  }
-  settings.log.printf("Done estimating C_B_nz\n");
-
-  csr_matrix_t<i_t, f_t> C_B(p, num_basic, C_B_nz);
-  nz = 0;
-  for (i_t i = 0; i < p; i++) {
-    C_B.row_start[i]    = nz;
-    const i_t row_start = cuts.row_start[i];
-    const i_t row_end   = cuts.row_start[i + 1];
-    for (i_t q = row_start; q < row_end; q++) {
-      const i_t j = cuts.j[q];
-      const i_t j_basis = in_basis[j];
-      if (j_basis == -1) { continue; }
-      C_B.j[nz] = j_basis;
-      C_B.x[nz] = cuts.x[q];
-      nz++;
-    }
-  }
-  C_B.row_start[p] = nz;
-  settings.log.printf("predicted nz %d actual nz %d\n", C_B_nz, nz);
-  if (nz != C_B_nz) { exit(1); return lp_status_t::NUMERICAL_ISSUES; }
-  settings.log.printf("C_B rows %d cols %d nz %d\n", C_B.m, C_B.n, nz);
-
-
-  // Adjust the basis update to include the new cuts
-  basis_update.append_cuts(C_B);
-
-  basic_list.resize(lp.num_rows, 0);
-  i_t h = old_cols;
-  for (i_t j = old_rows; j < lp.num_rows; j++) {
-    basic_list[j] = h++;
-  }
-
-#ifdef CHECK_BASIS
-  // Check the basis update
-  csc_matrix_t<i_t, f_t> Btest(lp.num_rows, lp.num_rows, 1);
-  basis_update.multiply_lu(Btest);
-
-  csc_matrix_t<i_t, f_t> B(lp.num_rows, lp.num_rows, 1);
-  form_b(lp.A, basic_list, B);
-
-  csc_matrix_t<i_t, f_t> Diff(lp.num_rows, lp.num_rows, 1);
-  add(Btest, B, 1.0, -1.0, Diff);
-  const f_t err = Diff.norm1();
-  settings.log.printf("After || B - L*U || %e\n", err);
-  if (err > 1e-6) {
-    settings.log.printf("Diff matrix\n");
-    //Diff.print_matrix();
-      exit(1);
-  }
-#endif
-  // Adjust the vstatus
-  vstatus.resize(lp.num_cols);
-  for (i_t j = old_cols; j < lp.num_cols; j++) {
-    vstatus[j] = variable_status_t::BASIC;
-  }
-
-  // Adjust the solution
-  solution.x.resize(lp.num_cols, 0.0);
-  solution.y.resize(lp.num_rows, 0.0);
-  solution.z.resize(lp.num_cols, 0.0);
-
-  // For now just clear the edge norms
-  edge_norms.clear();
-  i_t iter              = 0;
-  bool initialize_basis = false;
-  dual::status_t status = dual_phase2_with_advanced_basis(2,
-                                                          0,
-                                                          initialize_basis,
-                                                          start_time,
-                                                          lp,
-                                                          settings,
-                                                          vstatus,
-                                                          basis_update,
-                                                          basic_list,
-                                                          nonbasic_list,
-                                                          solution,
-                                                          iter,
-                                                          edge_norms);
-  settings.log.printf("Phase 2 iterations %d\n", iter);
-  solution.iterations = iter;
-  lp_status_t lp_status;
-  if (status == dual::status_t::OPTIMAL) { lp_status = lp_status_t::OPTIMAL; }
-  if (status == dual::status_t::DUAL_UNBOUNDED) { lp_status = lp_status_t::INFEASIBLE; }
-  if (status == dual::status_t::TIME_LIMIT) { lp_status = lp_status_t::TIME_LIMIT; }
-  if (status == dual::status_t::ITERATION_LIMIT) { lp_status = lp_status_t::ITERATION_LIMIT; }
-  if (status == dual::status_t::CONCURRENT_LIMIT) { lp_status = lp_status_t::CONCURRENT_LIMIT; }
-  if (status == dual::status_t::NUMERICAL) { lp_status = lp_status_t::NUMERICAL_ISSUES; }
-  if (status == dual::status_t::CUTOFF) { lp_status = lp_status_t::CUTOFF; }
-  return lp_status;
-}
-
 template <typename i_t, typename f_t>
 lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& user_problem,
                                               const simplex_solver_settings_t<i_t, f_t>& settings,
@@ -868,19 +662,6 @@ template lp_status_t solve_linear_program_with_advanced_basis(
   std::vector<variable_status_t>& vstatus,
   std::vector<double>& edge_norms);
 
-template lp_status_t solve_linear_program_with_cuts(
-  const double start_time,
-  const simplex_solver_settings_t<int, double>& settings,
-  const csr_matrix_t<int, double>& cuts,
-  const std::vector<double>& cut_rhs,
-  lp_problem_t<int, double>& lp,
-  lp_solution_t<int, double>& solution,
-  basis_update_mpf_t<int, double>& basis_update,
-  std::vector<int>& basic_list,
-  std::vector<int>& nonbasic_list,
-  std::vector<variable_status_t>& vstatus,
-  std::vector<double>& edge_norms);
-
 template lp_status_t solve_linear_program_with_barrier(
   const user_problem_t<int, double>& user_problem,
   const simplex_solver_settings_t<int, double>& settings,
diff --git a/cpp/src/dual_simplex/solve.hpp b/cpp/src/dual_simplex/solve.hpp
index d659d6282..e96229784 100644
--- a/cpp/src/dual_simplex/solve.hpp
+++ b/cpp/src/dual_simplex/solve.hpp
@@ -61,19 +61,6 @@ lp_status_t solve_linear_program_with_advanced_basis(
   std::vector<variable_status_t>& vstatus,
   std::vector<f_t>& edge_norms);
 
-template <typename i_t, typename f_t>
-lp_status_t solve_linear_program_with_cuts(const f_t start_time,
-                                           const simplex_solver_settings_t<i_t, f_t>& settings,
-                                           const csr_matrix_t<i_t, f_t>& cuts,
-                                           const std::vector<f_t>& cut_rhs,
-                                           lp_problem_t<i_t, f_t>& lp,
-                                           lp_solution_t<i_t, f_t>& solution,
-                                           basis_update_mpf_t<i_t, f_t>& basis_update,
-                                           std::vector<i_t>& basic_list,
-                                           std::vector<i_t>& nonbasic_list,
-                                           std::vector<variable_status_t>& vstatus,
-                                           std::vector<f_t>& edge_norms);
-
 template <typename i_t, typename f_t>
 lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& user_problem,
                                               const simplex_solver_settings_t<i_t, f_t>& settings,
diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp
index 0da4f90e9..8398065a7 100644
--- a/cpp/src/dual_simplex/sparse_matrix.cpp
+++ b/cpp/src/dual_simplex/sparse_matrix.cpp
@@ -403,7 +403,6 @@ template <typename i_t, typename f_t>
 i_t csr_matrix_t<i_t, f_t>::append_row(const sparse_vector_t<i_t, f_t>& c)
 {
   const i_t old_m = this->m;
-  const i_t n = this->n;
   const i_t old_nz = this->row_start[old_m];
   const i_t c_nz = c.i.size();
   const i_t new_nz = old_nz + c_nz;
@@ -625,6 +624,7 @@ void csr_matrix_t<i_t, f_t>::check_matrix() const
     const i_t row_end   = this->row_start[i + 1];
     for (i_t p = row_start; p < row_end; ++p) {
       const i_t j = this->j[p];
+      if (j < 0 || j >= this->n) { printf("CSR Error: column index %d not in range [0, %d)\n", j, this->n); }
       if (col_marker[j] == i) { printf("CSR Error: repeated column index %d in row %d\n", j, i); }
       col_marker[j] = i;
     }
diff --git a/cpp/src/mip/diversity/diversity_manager.cu b/cpp/src/mip/diversity/diversity_manager.cu
index 78f2b9fa0..cbc59b2ab 100644
--- a/cpp/src/mip/diversity/diversity_manager.cu
+++ b/cpp/src/mip/diversity/diversity_manager.cu
@@ -409,7 +409,7 @@ solution_t<i_t, f_t> diversity_manager_t<i_t, f_t>::run_solver()
     run_fj_alone(sol);
     return sol;
   }
-  rins.enable();
+  //rins.enable();
 
   generate_solution(timer.remaining_time(), false);
   if (timer.check_time_limit()) {
diff --git a/cpp/src/mip/diversity/lns/rins.cu b/cpp/src/mip/diversity/lns/rins.cu
index 1efc971b2..23522f07d 100644
--- a/cpp/src/mip/diversity/lns/rins.cu
+++ b/cpp/src/mip/diversity/lns/rins.cu
@@ -245,6 +245,7 @@ void rins_t<i_t, f_t>::run_rins()
   branch_and_bound_settings.num_bfs_threads    = 1;
   branch_and_bound_settings.num_diving_threads = 1;
   branch_and_bound_settings.log.log_prefix     = "[RINS] ";
+  branch_and_bound_settings.max_cut_passes     = 0;
   branch_and_bound_settings.solution_callback  = [this, &rins_solution_queue](
                                                   std::vector<f_t>& solution, f_t objective) {
     rins_solution_queue.push_back(solution);
diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu
index 68fb0c698..cc9f9f6c5 100644
--- a/cpp/src/mip/solver.cu
+++ b/cpp/src/mip/solver.cu
@@ -226,8 +226,8 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
                                                 std::ref(branch_and_bound_solution));
   }
 
-  auto bb_status = branch_and_bound_status_future.get();
-  exit(1);
+  //auto bb_status = branch_and_bound_status_future.get();
+  //CUOPT_LOG_INFO("BB status: %d", bb_status);
 
   // Start the primal heuristics
   auto sol = dm.run_solver();
diff --git a/cpp/tests/dual_simplex/unit_tests/solve.cpp b/cpp/tests/dual_simplex/unit_tests/solve.cpp
index cd66e63f1..66a2347d1 100644
--- a/cpp/tests/dual_simplex/unit_tests/solve.cpp
+++ b/cpp/tests/dual_simplex/unit_tests/solve.cpp
@@ -326,6 +326,7 @@ TEST(dual_simplex, dual_variable_greater_than)
   EXPECT_NEAR(solution.z[1], 0.0, 1e-6);
 }
 
+#if 0
 TEST(dual_simplex, simple_cuts)
 {
   // minimize x + y + 2 z
@@ -421,6 +422,8 @@ TEST(dual_simplex, simple_cuts)
   printf("cuts m %d n %d\n", cuts.m, cuts.n);
   std::vector<double> cut_rhs(1);
   cut_rhs[0] = -1.0 / 3.0;
+
+  std::vector<variable_type_t> var_types;
   EXPECT_EQ(cuopt::linear_programming::dual_simplex::solve_linear_program_with_cuts(start_time,
                                                                                     settings,
                                                                                     cuts,
@@ -431,7 +434,8 @@ TEST(dual_simplex, simple_cuts)
                                                                                     basic_list,
                                                                                     nonbasic_list,
                                                                                     vstatus,
-                                                                                    edge_norms),
+                                                                                    edge_norms,
+                                                                                    var_types),
             cuopt::linear_programming::dual_simplex::lp_status_t::OPTIMAL);
   printf("Solution objective: %e\n", solution.objective);
   printf("Solution x: %e %e %e\n", solution.x[0], solution.x[1], solution.x[2]);
@@ -460,7 +464,8 @@ TEST(dual_simplex, simple_cuts)
                                                                                     basic_list,
                                                                                     nonbasic_list,
                                                                                     vstatus,
-                                                                                    edge_norms),
+                                                                                    edge_norms,
+                                                                                    var_types),
             cuopt::linear_programming::dual_simplex::lp_status_t::OPTIMAL);
   printf("Solution objective: %e\n", solution.objective);
   printf("Solution x: %e %e %e\n", solution.x[0], solution.x[1], solution.x[2]);
@@ -470,5 +475,6 @@ TEST(dual_simplex, simple_cuts)
   EXPECT_NEAR(solution.x[2], 1.0 / 3.0, 1e-6);
 
 }
+#endif
 
 }  // namespace cuopt::linear_programming::dual_simplex::test

From 369e75512cbbf06d16c8ba263b9fd8438f0e53b6 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Fri, 19 Dec 2025 15:49:11 -0800
Subject: [PATCH 16/27] Only perform cuts on the original variables. Substitute
 out slack variables

---
 cpp/src/dual_simplex/branch_and_bound.cpp |  33 ++--
 cpp/src/dual_simplex/cuts.cpp             | 175 +++++++++++++++++-----
 cpp/src/dual_simplex/cuts.hpp             |  18 ++-
 cpp/src/dual_simplex/sparse_matrix.cpp    |   5 +
 cpp/src/dual_simplex/sparse_vector.cpp    |  11 ++
 cpp/src/dual_simplex/sparse_vector.hpp    |   2 +
 6 files changed, 194 insertions(+), 50 deletions(-)

diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp
index 945d84215..6f6917fec 100644
--- a/cpp/src/dual_simplex/branch_and_bound.cpp
+++ b/cpp/src/dual_simplex/branch_and_bound.cpp
@@ -306,7 +306,7 @@ void branch_and_bound_t<i_t, f_t>::set_new_solution(const std::vector<f_t>& solu
       std::string gap = user_mip_gap<f_t>(user_obj, user_lower);
 
       settings_.log.printf(
-        "H                                %+13.6e    %+10.6e                        %s %9.2f\n",
+        "H                           %+13.6e    %+10.6e                               %s %9.2f\n",
         user_obj,
         user_lower,
         gap.c_str(),
@@ -1139,6 +1139,9 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   csc_matrix_t<i_t, f_t> Arow(1, 1, 1);
   original_lp_.A.transpose(Arow);
 
+  status_                     = mip_exploration_status_t::RUNNING;
+  lower_bound_ceiling_        = inf;
+
   if (num_fractional != 0) {
     settings_.log.printf(
       " | Explored | Unexplored |    Objective    |     Bound     | IntInf | Depth | Iter/Node |   Gap    "
@@ -1187,7 +1190,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
 #endif
 
       // Generate cuts and add them to the cut pool
-      cut_generation.generate_cuts(original_lp_, settings_, Arow, var_types_, basis_update, root_relax_soln_.x, basic_list, nonbasic_list);
+      cut_generation.generate_cuts(original_lp_, settings_, Arow, new_slacks_, var_types_, basis_update, root_relax_soln_.x, basic_list, nonbasic_list);
 
       // Score the cuts
       cut_pool.score_cuts(root_relax_soln_.x);
@@ -1204,7 +1207,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
 #endif
 
       // Resolve the LP with the new cuts
-      settings_.log.printf("Solving LP with %d cuts (%d cut nonzeros). Cuts in pool %d. Total constraints %d\n",
+      settings_.log.debug("Solving LP with %d cuts (%d cut nonzeros). Cuts in pool %d. Total constraints %d\n",
                            num_cuts,
                            cuts_to_add.row_start[cuts_to_add.m],
                            cut_pool.pool_size(),
@@ -1216,6 +1219,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
                                      cuts_to_add,
                                      cut_rhs,
                                      original_lp_,
+                                     new_slacks_,
                                      root_relax_soln_,
                                      basis_update,
                                      basic_list,
@@ -1233,12 +1237,11 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
 
       std::vector<bool> bounds_changed(original_lp_.num_cols, true);
       std::vector<char> row_sense;
-
+#ifdef CHECK_MATRICES
       settings_.log.printf("Before A check\n");
       original_lp_.A.check_matrix();
-      settings_.log.printf("Before A transpose\n");
+#endif
       original_lp_.A.transpose(Arow);
-      settings_.log.printf("After A transpose\n");
       bool feasible =
         bound_strengthening(row_sense, settings_, original_lp_, Arow, var_types_, bounds_changed);
 
@@ -1270,7 +1273,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
                                                                   iter,
                                                                   edge_norms_);
 
-      settings_.log.printf("Cut LP iterations %d. A nz %d\n",
+      settings_.log.debug("Cut LP iterations %d. A nz %d\n",
                            iter,
                            original_lp_.A.col_start[original_lp_.A.n]);
       stats_.total_lp_iters += root_relax_soln_.iterations;
@@ -1286,6 +1289,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
       remove_cuts(original_lp_,
                   settings_,
                   Arow,
+                  new_slacks_,
                   original_rows,
                   var_types_,
                   root_vstatus_,
@@ -1300,14 +1304,18 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
       num_fractional = fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional);
 
       // TODO: Get upper bound from heuristics
-      std::string gap = num_fractional != 0 ? "  -  " : "0.0%";
-      f_t obj = num_fractional != 0 ? inf : compute_user_objective(original_lp_, root_objective_);
+      f_t upper_bound = get_upper_bound();
+      f_t obj = num_fractional != 0 ? get_upper_bound() : compute_user_objective(original_lp_, root_objective_);
+      f_t user_obj    = compute_user_objective(original_lp_, obj);
+      f_t user_lower  = compute_user_objective(original_lp_, root_objective_);
+      std::string gap = num_fractional != 0 ? user_mip_gap<f_t>(user_obj, user_lower) : "0.0%";
+
 
       settings_.log.printf(" %10d   %10lu    %+13.6e    %+10.6e   %6d %6d   %7.1e     %s %9.2f\n",
         0,
         0,
-        obj,
-        compute_user_objective(original_lp_, root_objective_),
+        user_obj,
+        user_lower,
         num_fractional,
         0,
         stats_.total_lp_iters.load(),
@@ -1361,8 +1369,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   stats_.last_log             = tic();
   active_subtrees_            = 0;
   min_diving_queue_size_      = 4 * settings_.num_diving_threads;
-  status_                     = mip_exploration_status_t::RUNNING;
-  lower_bound_ceiling_        = inf;
+
 
 #pragma omp parallel num_threads(settings_.num_threads)
   {
diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp
index 4b8fc94dd..8fbf1a275 100644
--- a/cpp/src/dual_simplex/cuts.cpp
+++ b/cpp/src/dual_simplex/cuts.cpp
@@ -12,14 +12,10 @@ namespace cuopt::linear_programming::dual_simplex {
 
 
 template <typename i_t, typename f_t>
-void cut_pool_t<i_t, f_t>::add_cut(i_t n, const sparse_vector_t<i_t, f_t>& cut, f_t rhs)
+void cut_pool_t<i_t, f_t>::add_cut(const sparse_vector_t<i_t, f_t>& cut, f_t rhs)
 {
   // TODO: Need to deduplicate cuts and only add if the cut is not already in the pool
 
-  if (n > cut_storage_.n) {
-    cut_storage_.n = n;
-  }
-
   for (i_t p = 0; p < cut.i.size(); p++) {
     const i_t j = cut.i[p];
     if (j >= original_vars_) {
@@ -153,7 +149,7 @@ template <typename i_t, typename f_t>
 i_t cut_pool_t<i_t, f_t>::get_best_cuts(csr_matrix_t<i_t, f_t>& best_cuts, std::vector<f_t>& best_rhs)
 {
   best_cuts.m = 0;
-  best_cuts.n = cut_storage_.n;
+  best_cuts.n = original_vars_;
   best_cuts.row_start.clear();
   best_cuts.j.clear();
   best_cuts.x.clear();
@@ -190,6 +186,7 @@ template <typename i_t, typename f_t>
 void cut_generation_t<i_t, f_t>::generate_cuts(const lp_problem_t<i_t, f_t>& lp,
                                                const simplex_solver_settings_t<i_t, f_t>& settings,
                                                csc_matrix_t<i_t, f_t>& Arow,
+                                               const std::vector<i_t>& new_slacks,
                                                const std::vector<variable_type_t>& var_types,
                                                basis_update_mpf_t<i_t, f_t>& basis_update,
                                                const std::vector<f_t>& xstar,
@@ -198,7 +195,7 @@ void cut_generation_t<i_t, f_t>::generate_cuts(const lp_problem_t<i_t, f_t>& lp,
 {
   // Generate Gomory Cuts
   generate_gomory_cuts(
-    lp, settings, Arow, var_types, basis_update, xstar, basic_list, nonbasic_list);
+    lp, settings, Arow, new_slacks, var_types, basis_update, xstar, basic_list, nonbasic_list);
 
 
  // Generate MIR cuts
@@ -209,11 +206,12 @@ template <typename i_t, typename f_t>
 void cut_generation_t<i_t, f_t>::generate_mir_cuts(const lp_problem_t<i_t, f_t>& lp,
                                                    const simplex_solver_settings_t<i_t, f_t>& settings,
                                                    csc_matrix_t<i_t, f_t>& Arow,
+                                                   const std::vector<i_t>& new_slacks,
                                                    const std::vector<variable_type_t>& var_types,
                                                    const std::vector<f_t>& xstar)
 {
   mixed_integer_rounding_cut_t<i_t, f_t> mir(lp.num_cols, settings);
-  mir.initialize(lp, xstar);
+  mir.initialize(lp, new_slacks, xstar);
 
   for (i_t i = 0; i < lp.num_rows; i++) {
     sparse_vector_t<i_t, f_t> inequality(Arow, i);
@@ -262,16 +260,18 @@ void cut_generation_t<i_t, f_t>::generate_mir_cuts(const lp_problem_t<i_t, f_t>&
         }
 
         settings.log.printf("Adding MIR cut %d\n", i);
-        cut_pool_.add_cut(lp.num_cols, cut, cut_rhs);
+        cut_pool_.add_cut(cut, cut_rhs);
     }
   }
 }
 
+
 template <typename i_t, typename f_t>
 void cut_generation_t<i_t, f_t>::generate_gomory_cuts(
   const lp_problem_t<i_t, f_t>& lp,
   const simplex_solver_settings_t<i_t, f_t>& settings,
   csc_matrix_t<i_t, f_t>& Arow,
+  const std::vector<i_t>& new_slacks,
   const std::vector<variable_type_t>& var_types,
   basis_update_mpf_t<i_t, f_t>& basis_update,
   const std::vector<f_t>& xstar,
@@ -281,7 +281,7 @@ void cut_generation_t<i_t, f_t>::generate_gomory_cuts(
   mixed_integer_gomory_base_inequality_t<i_t, f_t> gomory(lp, basis_update, nonbasic_list);
   mixed_integer_rounding_cut_t<i_t, f_t> mir(lp.num_cols, settings);
 
-  mir.initialize(lp, xstar);
+  mir.initialize(lp, new_slacks, xstar);
 
   for (i_t i = 0; i < lp.num_rows; i++) {
     sparse_vector_t<i_t, f_t> inequality(lp.num_cols, 0);
@@ -310,15 +310,10 @@ void cut_generation_t<i_t, f_t>::generate_gomory_cuts(
       bool A_valid = false;
       f_t cut_A_distance = 0.0;
       if (mir_status == 0) {
+        mir.substitute_slacks(lp, Arow, cut_A, cut_A_rhs);
         // Check that the cut is violated
-        f_t dot = 0.0;
-        f_t cut_norm = 0.0;
-        for (i_t k = 0; k < cut_A.i.size(); k++) {
-          const i_t jj = cut_A.i[k];
-          const f_t aj = cut_A.x[k];
-          dot += aj * xstar[jj];
-          cut_norm += aj * aj;
-        }
+        f_t dot = cut_A.dot(xstar);
+        f_t cut_norm = cut_A.norm2_squared();
         if (dot >= cut_A_rhs) {
           settings.log.printf("Cut %d is not violated. Skipping\n", i);
           continue;
@@ -340,15 +335,10 @@ void cut_generation_t<i_t, f_t>::generate_gomory_cuts(
       bool B_valid = false;
       f_t cut_B_distance = 0.0;
       if (mir_status == 0) {
+        mir.substitute_slacks(lp, Arow, cut_B, cut_B_rhs);
         // Check that the cut is violated
-        f_t dot = 0.0;
-        f_t cut_norm = 0.0;
-        for (i_t k = 0; k < cut_B.i.size(); k++) {
-          const i_t jj = cut_B.i[k];
-          const f_t aj = cut_B.x[k];
-          dot += aj * xstar[jj];
-          cut_norm += aj * aj;
-        }
+        f_t dot = cut_B.dot(xstar);
+        f_t cut_norm = cut_B.norm2_squared();
         if (dot >= cut_B_rhs) {
           settings.log.printf("Cut %d is not violated. Skipping\n", i);
           continue;
@@ -359,9 +349,9 @@ void cut_generation_t<i_t, f_t>::generate_gomory_cuts(
       }
 
       if ((cut_A_distance > cut_B_distance) && A_valid) {
-        cut_pool_.add_cut(lp.num_cols, cut_A, cut_A_rhs);
+        cut_pool_.add_cut(cut_A, cut_A_rhs);
       } else if (B_valid) {
-        cut_pool_.add_cut(lp.num_cols, cut_B, cut_B_rhs);
+        cut_pool_.add_cut(cut_B, cut_B_rhs);
       }
     }
   }
@@ -526,7 +516,8 @@ i_t mixed_integer_gomory_base_inequality_t<i_t, f_t>::generate_base_inequality(
 
 template <typename i_t, typename f_t>
 void mixed_integer_rounding_cut_t<i_t, f_t>::initialize(const lp_problem_t<i_t, f_t>& lp,
-                                              const std::vector<f_t>& xstar)
+                                                        const std::vector<i_t>& new_slacks,
+                                                        const std::vector<f_t>& xstar)
 {
 
   if (lp.num_cols != num_vars_) {
@@ -537,6 +528,17 @@ void mixed_integer_rounding_cut_t<i_t, f_t>::initialize(const lp_problem_t<i_t,
     has_upper_.resize(num_vars_, 0);
   }
 
+  is_slack_.clear();
+  is_slack_.resize(num_vars_, 0);
+  slack_rows_.clear();
+  slack_rows_.resize(num_vars_, 0);
+
+  for (i_t j : new_slacks) {
+    is_slack_[j] = 1;
+    const i_t col_start = lp.A.col_start[j];
+    const i_t i = lp.A.i[col_start];
+    slack_rows_[j] = i;
+  }
 
   needs_complement_ = false;
   for (i_t j = 0; j < lp.num_cols; j++) {
@@ -721,11 +723,92 @@ i_t mixed_integer_rounding_cut_t<i_t, f_t>::generate_cut(
   return 0;
 }
 
+template <typename i_t, typename f_t>
+void mixed_integer_rounding_cut_t<i_t, f_t>::substitute_slacks(const lp_problem_t<i_t, f_t>& lp,
+                                                               csc_matrix_t<i_t, f_t>& Arow,
+                                                               sparse_vector_t<i_t, f_t>& cut,
+                                                               f_t& cut_rhs)
+{
+  // Remove slacks from the cut
+  // So that the cut is only over the original variables
+  bool found_slack = false;
+  i_t cut_nz = 0;
+  std::vector<i_t> cut_indices;
+  cut_indices.reserve(cut.i.size());
+  for (i_t k = 0; k < cut.i.size(); k++) {
+    const i_t j  = cut.i[k];
+    const f_t cj = cut.x[k];
+    if (is_slack_[j]) {
+      found_slack = true;
+      // Do the substitution
+      // Slack variable s_j participates in row i of the constraint matrix
+      // Row i is of the form:
+      // sum_{k != j} A(i, k) * x_k + A(i, j) * s_j = rhs_i
+      /// So we have that
+      // s_j = rhs_i - sum_{k != j} A(i, k) * x_k
+
+      // Our cut is of the form:
+      // sum_{k != j} C(k) * x_k + C(j) * s_j >= cut_rhs
+      // So the cut becomes
+      // sum_{k != j} C(k) * x_k + C(j) * (rhs_i - sum_{k != j} A(i, k) * x_k) >= cut_rhs
+      // This is equivalent to:
+      // sum_{k != j} C(k) * x_k + sum_{k != j} -C(k) * A(i, k) * x_k >= cut_rhs - C(j) * rhs_i
+      const i_t i         = slack_rows_[j];
+      cut_rhs -= cj * lp.rhs[i];
+      const i_t row_start = Arow.col_start[i];
+      const i_t row_end   = Arow.col_start[i + 1];
+      for (i_t q = row_start; q < row_end; q++) {
+        const i_t k = Arow.i[q];
+        if (k != j) {
+          const f_t aik = Arow.x[q];
+          x_workspace_[k] -= cj * aik;
+          if (!x_mark_[k]) {
+            x_mark_[k] = 1;
+            cut_indices.push_back(k);
+            cut_nz++;
+          }
+        }
+      }
+
+    } else {
+      x_workspace_[j] += cj;
+      if (!x_mark_[j]) {
+        x_mark_[j] = 1;
+        cut_indices.push_back(j);
+        cut_nz++;
+      }
+    }
+  }
+
+  if (found_slack) {
+    //printf("Found slack. Nz increased from %d to %d: %d\n", cut.i.size(), cut_nz, cut_nz - cut.i.size());
+    cut.i.reserve(cut_nz);
+    cut.x.reserve(cut_nz);
+    cut.i.clear();
+    cut.x.clear();
+
+    for (i_t k = 0; k < cut_nz; k++) {
+      const i_t j = cut_indices[k];
+      cut.i.push_back(j);
+      cut.x.push_back(x_workspace_[j]);
+    }
+    // Sort the cut
+    cut.sort();
+
+    // Clear the workspace
+    for (i_t jj : cut_indices) {
+      x_workspace_[jj] = 0.0;
+      x_mark_[jj]      = 0;
+    }
+  }
+}
+
 template <typename i_t, typename f_t>
 i_t add_cuts(const simplex_solver_settings_t<i_t, f_t>& settings,
              const csr_matrix_t<i_t, f_t>& cuts,
              const std::vector<f_t>& cut_rhs,
              lp_problem_t<i_t, f_t>& lp,
+             std::vector<i_t>& new_slacks,
              lp_solution_t<i_t, f_t>& solution,
              basis_update_mpf_t<i_t, f_t>& basis_update,
              std::vector<i_t>& basic_list,
@@ -764,7 +847,11 @@ i_t add_cuts(const simplex_solver_settings_t<i_t, f_t>& settings,
   csr_matrix_t<i_t, f_t> new_A_row(lp.num_rows, lp.num_cols, 1);
   lp.A.to_compressed_row(new_A_row);
 
-  new_A_row.append_rows(cuts);
+  i_t append_status = new_A_row.append_rows(cuts);
+  if (append_status != 0) {
+    settings.log.printf("append_rows error: %d\n", append_status);
+    exit(1);
+  }
 
   csc_matrix_t<i_t, f_t> new_A_col(lp.num_rows + p, lp.num_cols, 1);
   new_A_row.to_compressed_col(new_A_col);
@@ -786,6 +873,7 @@ i_t add_cuts(const simplex_solver_settings_t<i_t, f_t>& settings,
     lp.lower[j]     = 0.0;
     lp.upper[j]     = inf;
     lp.objective[j] = 0.0;
+    new_slacks.push_back(j);
   }
   settings.log.debug("Done adding slacks\n");
   new_A_col.col_start[lp.num_cols + p] = nz;
@@ -895,6 +983,7 @@ template <typename i_t, typename f_t>
 void remove_cuts(lp_problem_t<i_t, f_t>& lp,
                  const simplex_solver_settings_t<i_t, f_t>& settings,
                  csc_matrix_t<i_t, f_t>& Arow,
+                 std::vector<i_t>& new_slacks,
                  i_t original_rows,
                  std::vector<variable_type_t>& var_types,
                  std::vector<variable_status_t>& vstatus,
@@ -910,6 +999,12 @@ void remove_cuts(lp_problem_t<i_t, f_t>& lp,
   std::vector<i_t> slacks_to_remove;
   slacks_to_remove.reserve(lp.num_rows - original_rows);
   const f_t dual_tol = 1e-10;
+
+  std::vector<i_t> is_slack(lp.num_cols, 0);
+  for (i_t j : new_slacks) {
+    is_slack[j] = 1;
+  }
+
   for (i_t k = original_rows; k < lp.num_rows; k++) {
     if (std::abs(y[k]) < dual_tol) {
       const i_t row_start = Arow.col_start[k];
@@ -917,11 +1012,9 @@ void remove_cuts(lp_problem_t<i_t, f_t>& lp,
       i_t last_slack      = -1;
       const f_t slack_tol = 1e-3;
       for (i_t p = row_start; p < row_end; p++) {
-        const i_t jj      = Arow.i[p];
-        const i_t col_len = lp.A.col_start[jj + 1] - lp.A.col_start[jj];
-        if (col_len == 1 && var_types[jj] == variable_type_t::CONTINUOUS && Arow.x[p] == 1.0 &&
-            lp.lower[jj] == 0.0) {
-          if (vstatus[jj] == variable_status_t::BASIC && x[jj] > slack_tol) { last_slack = jj; }
+        const i_t j      = Arow.i[p];
+        if (is_slack[j]) {
+          if (vstatus[j] == variable_status_t::BASIC && x[j] > slack_tol) { last_slack = j; }
         }
       }
       if (last_slack != -1) {
@@ -967,6 +1060,7 @@ void remove_cuts(lp_problem_t<i_t, f_t>& lp,
     new_nonbasic_list.reserve(nonbasic_list.size());
     std::vector<f_t> new_solution_x(lp.num_cols - slacks_to_remove.size());
     std::vector<f_t> new_solution_z(lp.num_cols - slacks_to_remove.size());
+    std::vector<i_t> new_is_slacks(lp.num_cols - slacks_to_remove.size(), 0);
     h = 0;
     for (i_t k = 0; k < lp.num_cols; k++) {
       if (!marked_cols[k]) {
@@ -977,6 +1071,7 @@ void remove_cuts(lp_problem_t<i_t, f_t>& lp,
         new_vstatus[h]    = vstatus[k];
         new_solution_x[h] = x[k];
         new_solution_z[h] = z[k];
+        new_is_slacks[h] = is_slack[k];
         if (new_vstatus[h] != variable_status_t::BASIC) {
           new_nonbasic_list.push_back(h);
         } else {
@@ -994,6 +1089,14 @@ void remove_cuts(lp_problem_t<i_t, f_t>& lp,
     var_types     = new_var_types;
     lp.num_cols   = lp.A.n;
     lp.num_rows   = lp.A.m;
+
+    new_slacks.clear();
+    new_slacks.resize(lp.num_cols);
+    for (i_t j = 0; j < lp.num_cols; j++) {
+        if (new_is_slacks[j]) {
+            new_slacks.push_back(j);
+        }
+    }
     basic_list    = new_basic_list;
     nonbasic_list = new_nonbasic_list;
     vstatus       = new_vstatus;
@@ -1023,6 +1126,7 @@ int add_cuts(const simplex_solver_settings_t<int, double>& settings,
               const csr_matrix_t<int, double>& cuts,
               const std::vector<double>& cut_rhs,
               lp_problem_t<int, double>& lp,
+              std::vector<int>& new_slacks,
               lp_solution_t<int, double>& solution,
               basis_update_mpf_t<int, double>& basis_update,
               std::vector<int>& basic_list,
@@ -1034,6 +1138,7 @@ template
 void remove_cuts<int, double>(lp_problem_t<int, double>& lp,
                  const simplex_solver_settings_t<int, double>& settings,
                  csc_matrix_t<int, double>& Arow,
+                 std::vector<int>& new_slacks,
                  int original_rows,
                  std::vector<variable_type_t>& var_types,
                  std::vector<variable_status_t>& vstatus,
diff --git a/cpp/src/dual_simplex/cuts.hpp b/cpp/src/dual_simplex/cuts.hpp
index 14b6d0e1f..9113b926e 100644
--- a/cpp/src/dual_simplex/cuts.hpp
+++ b/cpp/src/dual_simplex/cuts.hpp
@@ -54,7 +54,7 @@ class cut_pool_t {
   // Add a cut in the form: cut'*x >= rhs.
   // We expect that the cut is violated by the current relaxation
   // cut'*xstart < rhs
-  void add_cut(i_t n, const sparse_vector_t<i_t, f_t>& cut, f_t rhs);
+  void add_cut(const sparse_vector_t<i_t, f_t>& cut, f_t rhs);
 
   void score_cuts(std::vector<f_t>& x_relax);
 
@@ -96,6 +96,7 @@ class cut_generation_t {
   void generate_cuts(const lp_problem_t<i_t, f_t>& lp,
                      const simplex_solver_settings_t<i_t, f_t>& settings,
                      csc_matrix_t<i_t, f_t>& Arow,
+                     const std::vector<i_t>& new_slacks,
                      const std::vector<variable_type_t>& var_types,
                      basis_update_mpf_t<i_t, f_t>& basis_update,
                      const std::vector<f_t>& xstar,
@@ -106,6 +107,7 @@ class cut_generation_t {
   void generate_gomory_cuts(const lp_problem_t<i_t, f_t>& lp,
                             const simplex_solver_settings_t<i_t, f_t>& settings,
                             csc_matrix_t<i_t, f_t>& Arow,
+                            const std::vector<i_t>& new_slacks,
                             const std::vector<variable_type_t>& var_types,
                             basis_update_mpf_t<i_t, f_t>& basis_update,
                             const std::vector<f_t>& xstar,
@@ -115,6 +117,7 @@ class cut_generation_t {
   void generate_mir_cuts(const lp_problem_t<i_t, f_t>& lp,
                          const simplex_solver_settings_t<i_t, f_t>& settings,
                          csc_matrix_t<i_t, f_t>& Arow,
+                         const std::vector<i_t>& new_slacks,
                          const std::vector<variable_type_t>& var_types,
                          const std::vector<f_t>& xstar);
   cut_pool_t<i_t, f_t>& cut_pool_;
@@ -171,7 +174,9 @@ class mixed_integer_rounding_cut_t {
   {
   }
 
-  void initialize(const lp_problem_t<i_t, f_t>& lp, const std::vector<f_t>& xstar);
+  void initialize(const lp_problem_t<i_t, f_t>& lp,
+                  const std::vector<i_t>& new_slacks,
+                  const std::vector<f_t>& xstar);
 
   i_t generate_cut(const sparse_vector_t<i_t, f_t>& a,
                    f_t beta,
@@ -181,6 +186,11 @@ class mixed_integer_rounding_cut_t {
                    sparse_vector_t<i_t, f_t>& cut,
                    f_t& cut_rhs);
 
+  void substitute_slacks(const lp_problem_t<i_t, f_t>& lp,
+                         csc_matrix_t<i_t, f_t>& Arow,
+                         sparse_vector_t<i_t, f_t>& cut,
+                         f_t& cut_rhs);
+
  private:
   i_t num_vars_;
   const simplex_solver_settings_t<i_t, f_t>& settings_;
@@ -188,6 +198,8 @@ class mixed_integer_rounding_cut_t {
   std::vector<i_t> x_mark_;
   std::vector<i_t> has_lower_;
   std::vector<i_t> has_upper_;
+  std::vector<i_t> is_slack_;
+  std::vector<i_t> slack_rows_;
   bool needs_complement_;
 };
 
@@ -196,6 +208,7 @@ i_t add_cuts(const simplex_solver_settings_t<i_t, f_t>& settings,
              const csr_matrix_t<i_t, f_t>& cuts,
              const std::vector<f_t>& cut_rhs,
              lp_problem_t<i_t, f_t>& lp,
+             std::vector<i_t>& new_slacks,
              lp_solution_t<i_t, f_t>& solution,
              basis_update_mpf_t<i_t, f_t>& basis_update,
              std::vector<i_t>& basic_list,
@@ -207,6 +220,7 @@ template <typename i_t, typename f_t>
 void remove_cuts(lp_problem_t<i_t, f_t>& lp,
                  const simplex_solver_settings_t<i_t, f_t>& settings,
                  csc_matrix_t<i_t, f_t>& Arow,
+                 std::vector<i_t>& new_slacks,
                  i_t original_rows,
                  std::vector<variable_type_t>& var_types,
                  std::vector<variable_status_t>& vstatus,
diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp
index 8398065a7..6160cf1b4 100644
--- a/cpp/src/dual_simplex/sparse_matrix.cpp
+++ b/cpp/src/dual_simplex/sparse_matrix.cpp
@@ -365,6 +365,7 @@ i_t csr_matrix_t<i_t, f_t>::append_rows(const csr_matrix_t<i_t, f_t>& C)
   const i_t old_nz = this->row_start[old_m];
   const i_t C_row  = C.m;
   if (C.n > n) {
+    printf("append_rows error: C.n %d n %d\n", C.n, n);
     return -1;
   }
   const i_t C_nz   = C.row_start[C_row];
@@ -566,6 +567,10 @@ i_t csc_matrix_t<i_t, f_t>::check_matrix() const
 {
   std::vector<i_t> row_marker(this->m, -1);
   for (i_t j = 0; j < this->n; ++j) {
+    if (j >= col_start.size()) {
+      printf("Col start too small size %ld n %d\n", col_start.size(), this->n);
+      return -1;
+    }
     const i_t col_start = this->col_start[j];
     const i_t col_end   = this->col_start[j + 1];
     if (col_start > col_end || col_start > this->col_start[this->n]) {
diff --git a/cpp/src/dual_simplex/sparse_vector.cpp b/cpp/src/dual_simplex/sparse_vector.cpp
index 46bae286c..3ba981539 100644
--- a/cpp/src/dual_simplex/sparse_vector.cpp
+++ b/cpp/src/dual_simplex/sparse_vector.cpp
@@ -121,6 +121,17 @@ void sparse_vector_t<i_t, f_t>::inverse_permute_vector(const std::vector<i_t>& p
   y.i = i_perm;
 }
 
+template <typename i_t, typename f_t>
+f_t sparse_vector_t<i_t, f_t>::dot(const std::vector<f_t>& x_dense) const
+{
+  const i_t nz = i.size();
+  f_t dot = 0.0;
+  for (i_t k = 0; k < nz; ++k) {
+    dot += x[k] * x_dense[i[k]];
+  }
+  return dot;
+}
+
 template <typename i_t, typename f_t>
 f_t sparse_vector_t<i_t, f_t>::sparse_dot(const csc_matrix_t<i_t, f_t>& Y, i_t y_col) const
 {
diff --git a/cpp/src/dual_simplex/sparse_vector.hpp b/cpp/src/dual_simplex/sparse_vector.hpp
index afa559926..3badeed12 100644
--- a/cpp/src/dual_simplex/sparse_vector.hpp
+++ b/cpp/src/dual_simplex/sparse_vector.hpp
@@ -40,6 +40,8 @@ class sparse_vector_t {
   void inverse_permute_vector(const std::vector<i_t>& p);
   // inverse permute a sparse vector into another sparse vector
   void inverse_permute_vector(const std::vector<i_t>& p, sparse_vector_t<i_t, f_t>& y) const;
+  // compute the dot product of a sparse vector with a dense vector
+  f_t dot(const std::vector<f_t>& x) const;
   // compute the dot product of a sparse vector with a column of a CSC matrix
   f_t sparse_dot(const csc_matrix_t<i_t, f_t>& Y, i_t y_col) const;
   // ensure the coefficients in the sparse vectory are sorted in terms of increasing index

From b48e05b2dc2fcab55df692d622af7591992199e6 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Mon, 5 Jan 2026 09:00:39 -0800
Subject: [PATCH 17/27] Knapsack cuts from before the winter break

---
 cpp/src/dual_simplex/branch_and_bound.cpp |  24 +-
 cpp/src/dual_simplex/cuts.cpp             | 496 ++++++++++++++++++++--
 cpp/src/dual_simplex/cuts.hpp             |  82 +++-
 cpp/src/dual_simplex/dense_matrix.hpp     |   2 +
 4 files changed, 560 insertions(+), 44 deletions(-)

diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp
index 6f6917fec..9d4af809a 100644
--- a/cpp/src/dual_simplex/branch_and_bound.cpp
+++ b/cpp/src/dual_simplex/branch_and_bound.cpp
@@ -1149,7 +1149,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   }
 
   cut_pool_t<i_t, f_t> cut_pool(original_lp_.num_cols, settings_);
-  cut_generation_t<i_t, f_t> cut_generation(cut_pool);
+  cut_generation_t<i_t, f_t> cut_generation(cut_pool, original_lp_, settings_, Arow, new_slacks_, var_types_);
 
   for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) {
     if (num_fractional == 0) {
@@ -1191,23 +1191,37 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
 
       // Generate cuts and add them to the cut pool
       cut_generation.generate_cuts(original_lp_, settings_, Arow, new_slacks_, var_types_, basis_update, root_relax_soln_.x, basic_list, nonbasic_list);
+      settings_.log.printf("Generated cuts\n");
 
       // Score the cuts
       cut_pool.score_cuts(root_relax_soln_.x);
+      settings_.log.printf("Scored cuts\n");
       // Get the best cuts from the cut pool
       csr_matrix_t<i_t, f_t> cuts_to_add(0, original_lp_.num_cols, 0);
       std::vector<f_t> cut_rhs;
-      i_t num_cuts = cut_pool.get_best_cuts(cuts_to_add, cut_rhs);
+      std::vector<cut_type_t> cut_types;
+      i_t num_cuts = cut_pool.get_best_cuts(cuts_to_add, cut_rhs, cut_types);
+      settings_.log.printf("Got best cuts\n");
+      print_cut_types(cut_types, settings_);
 
       cuts_to_add.check_matrix();
 
-#ifdef PRINT_MIN_CUT_VIOLATION
+#ifdef PRINT_CUTS
+      csc_matrix_t<i_t, f_t> cuts_to_add_col(cuts_to_add.m, cuts_to_add.n, cuts_to_add.row_start[cuts_to_add.m]);
+      cuts_to_add.to_compressed_col(cuts_to_add_col);
+      cuts_to_add_col.print_matrix();
+      for (i_t i = 0; i < cut_rhs.size(); i++) {
+        printf("cut_rhs[%d] = %g\n", i, cut_rhs[i]);
+      }
+#endif
+
+#if 1
       f_t min_cut_violation = minimum_violation(cuts_to_add, cut_rhs, root_relax_soln_.x);
       settings_.log.printf("Min cut violation %e\n", min_cut_violation);
 #endif
 
       // Resolve the LP with the new cuts
-      settings_.log.debug("Solving LP with %d cuts (%d cut nonzeros). Cuts in pool %d. Total constraints %d\n",
+      settings_.log.printf("Solving LP with %d cuts (%d cut nonzeros). Cuts in pool %d. Total constraints %d\n",
                            num_cuts,
                            cuts_to_add.row_start[cuts_to_add.m],
                            cut_pool.pool_size(),
@@ -1242,6 +1256,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
       original_lp_.A.check_matrix();
 #endif
       original_lp_.A.transpose(Arow);
+#if 1
       bool feasible =
         bound_strengthening(row_sense, settings_, original_lp_, Arow, var_types_, bounds_changed);
 
@@ -1249,6 +1264,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
         settings_.log.printf("Bound strengthening failed\n");
         exit(1);
       }
+#endif
 
       // Adjust the solution
       root_relax_soln_.x.resize(original_lp_.num_cols, 0.0);
diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp
index 8fbf1a275..606c46f24 100644
--- a/cpp/src/dual_simplex/cuts.cpp
+++ b/cpp/src/dual_simplex/cuts.cpp
@@ -6,13 +6,14 @@
 /* clang-format on */
 
 #include <dual_simplex/cuts.hpp>
+#include <dual_simplex/dense_matrix.hpp>
 
 
 namespace cuopt::linear_programming::dual_simplex {
 
 
 template <typename i_t, typename f_t>
-void cut_pool_t<i_t, f_t>::add_cut(const sparse_vector_t<i_t, f_t>& cut, f_t rhs)
+void cut_pool_t<i_t, f_t>::add_cut(cut_type_t cut_type, const sparse_vector_t<i_t, f_t>& cut, f_t rhs)
 {
   // TODO: Need to deduplicate cuts and only add if the cut is not already in the pool
 
@@ -26,7 +27,9 @@ void cut_pool_t<i_t, f_t>::add_cut(const sparse_vector_t<i_t, f_t>& cut, f_t rhs
   }
 
   cut_storage_.append_row(cut);
+  settings_.log.printf("Added cut %d to pool\n", cut_storage_.m - 1);
   rhs_storage_.push_back(rhs);
+  cut_type_.push_back(cut_type);
   cut_age_.push_back(0);
 }
 
@@ -108,6 +111,8 @@ void cut_pool_t<i_t, f_t>::score_cuts(std::vector<f_t>& x_relax)
   const f_t min_orthogonality = 0.5;
   const f_t min_cut_distance = 1e-4;
   best_cuts_.reserve(std::min(max_cuts, cut_storage_.m));
+  best_cuts_.clear();
+  scored_cuts_ = 0;
 
   while (scored_cuts_ < max_cuts && !sorted_indices.empty()) {
     const i_t i = sorted_indices[0];
@@ -119,7 +124,7 @@ void cut_pool_t<i_t, f_t>::score_cuts(std::vector<f_t>& x_relax)
     if (cut_age_[i] > 0) {
         settings_.log.printf("Adding cut with age %d\n", cut_age_[i]);
     }
-    //settings_.log.printf("Scored cuts %d. Adding cut %d score %e\n", scored_cuts_, i, cut_scores_[i]);
+    settings_.log.printf("Scored cuts %d. Adding cut %d score %e\n", scored_cuts_, i, cut_scores_[i]);
 
     best_cuts_.push_back(i);
     scored_cuts_++;
@@ -146,7 +151,7 @@ void cut_pool_t<i_t, f_t>::score_cuts(std::vector<f_t>& x_relax)
 }
 
 template <typename i_t, typename f_t>
-i_t cut_pool_t<i_t, f_t>::get_best_cuts(csr_matrix_t<i_t, f_t>& best_cuts, std::vector<f_t>& best_rhs)
+i_t cut_pool_t<i_t, f_t>::get_best_cuts(csr_matrix_t<i_t, f_t>& best_cuts, std::vector<f_t>& best_rhs, std::vector<cut_type_t>& best_cut_types)
 {
   best_cuts.m = 0;
   best_cuts.n = original_vars_;
@@ -162,6 +167,7 @@ i_t cut_pool_t<i_t, f_t>::get_best_cuts(csr_matrix_t<i_t, f_t>& best_cuts, std::
     best_cuts.append_row(cut);
     //settings_.log.printf("Best cuts nz %d\n", best_cuts.row_start[best_cuts.m]);
     best_rhs.push_back(-rhs_storage_[i]);
+    best_cut_types.push_back(cut_type_[i]);
   }
 
   return static_cast<i_t>(best_cuts_.size());
@@ -182,6 +188,331 @@ void cut_pool_t<i_t, f_t>::drop_cuts()
    // TODO: Implement this
 }
 
+template <typename i_t, typename f_t>
+knapsack_generation_t<i_t, f_t>::knapsack_generation_t(
+  const lp_problem_t<i_t, f_t>& lp,
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  csc_matrix_t<i_t, f_t>& Arow,
+  const std::vector<i_t>& new_slacks,
+  const std::vector<variable_type_t>& var_types)
+{
+  knapsack_constraints_.reserve(lp.num_rows);
+
+  is_slack_.resize(lp.num_cols, 0);
+  for (i_t j : new_slacks) {
+    is_slack_[j] = 1;
+  }
+
+  for (i_t i = 0; i < lp.num_rows; i++) {
+    const i_t row_start = Arow.col_start[i];
+    const i_t row_end   = Arow.col_start[i + 1];
+    bool is_knapsack    = true;
+    f_t sum_pos         = 0.0;
+    //printf("i %d ", i);
+    for (i_t p = row_start; p < row_end; p++) {
+      const i_t j = Arow.i[p];
+      if (is_slack_[j]) { continue; }
+      const f_t aj = Arow.x[p];
+      //printf(" j %d (%e < %e) aj %e\n", j, lp.lower[j], lp.upper[j], aj);
+      if (std::abs(aj - std::round(aj)) > settings.integer_tol) {
+        is_knapsack = false;
+        break;
+      }
+      if (var_types[j] != variable_type_t::INTEGER || lp.lower[j] != 0.0 || lp.upper[j] != 1.0) {
+        is_knapsack = false;
+        break;
+      }
+      if (aj < 0.0) {
+        is_knapsack = false;
+        break;
+      }
+      sum_pos += aj;
+    }
+   // printf("sum_pos %e\n", sum_pos);
+
+    if (is_knapsack) {
+      const f_t beta = lp.rhs[i];
+      printf("Knapsack constraint %d beta %e sum_pos %e\n", i, beta, sum_pos);
+      if (std::abs(beta - std::round(beta)) <= settings.integer_tol) {
+        if (beta >= 0.0 && beta <= sum_pos) {
+          knapsack_constraints_.push_back(i);
+        }
+      }
+    }
+  }
+
+  i_t num_knapsack_constraints = knapsack_constraints_.size();
+  settings.log.printf("Number of knapsack constraints %d\n", num_knapsack_constraints);
+}
+
+template <typename i_t, typename f_t>
+i_t knapsack_generation_t<i_t, f_t>::generate_knapsack_cuts(
+  const lp_problem_t<i_t, f_t>& lp,
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  csc_matrix_t<i_t, f_t>& Arow,
+  const std::vector<i_t>& new_slacks,
+  const std::vector<variable_type_t>& var_types,
+  const std::vector<f_t>& xstar,
+  i_t knapsack_row,
+  sparse_vector_t<i_t, f_t>& cut,
+  f_t& cut_rhs)
+{
+  // Get the row associated with the knapsack constraint
+  sparse_vector_t<i_t, f_t> knapsack_inequality(Arow, knapsack_row);
+  f_t knapsack_rhs = lp.rhs[knapsack_row];
+
+  // Remove the slacks from the inequality
+  f_t seperation_rhs = 0.0;
+  printf(" Knapsack : ");
+  for (i_t k = 0; k < knapsack_inequality.i.size(); k++) {
+    const i_t j = knapsack_inequality.i[k];
+    if (is_slack_[j]) {
+      knapsack_inequality.x[k] = 0.0;
+    } else {
+      printf(" %g x%d +", knapsack_inequality.x[k], j);
+      seperation_rhs += knapsack_inequality.x[k];
+    }
+  }
+  printf(" <= %g\n", knapsack_rhs);
+  seperation_rhs -= (knapsack_rhs + 1);
+
+  printf("\t");
+  for (i_t k = 0; k < knapsack_inequality.i.size(); k++) {
+    const i_t j = knapsack_inequality.i[k];
+    if (!is_slack_[j]) {
+        if (std::abs(xstar[j]) > 1e-3) {
+          printf("x_relax[%d]= %g ", j, xstar[j]);
+        }
+    }
+  }
+  printf("\n");
+
+  printf("seperation_rhs %g\n", seperation_rhs);
+  if (seperation_rhs <= 0.0) { return -1; }
+
+  std::vector<f_t> values;
+  values.resize(knapsack_inequality.i.size() - 1);
+  std::vector<f_t> weights;
+  weights.resize(knapsack_inequality.i.size() - 1);
+  i_t h                  = 0;
+  f_t objective_constant = 0.0;
+  for (i_t k = 0; k < knapsack_inequality.i.size(); k++) {
+    const i_t j = knapsack_inequality.i[k];
+    if (!is_slack_[j]) {
+      const f_t vj = 1.0 - xstar[j];
+      objective_constant += vj;
+      values[h]  = vj;
+      weights[h] = knapsack_inequality.x[k];
+      h++;
+    }
+  }
+  std::vector<f_t> solution;
+  solution.resize(knapsack_inequality.i.size() - 1);
+
+  printf("Calling solve_knapsack_problem\n");
+  f_t objective = solve_knapsack_problem(values, weights, seperation_rhs, solution);
+  if (objective != objective) { return -1; }
+  printf("objective %e objective_constant %e\n", objective, objective_constant);
+
+  f_t seperation_value = -objective + objective_constant;
+  printf("seperation_value %e\n", seperation_value);
+  const f_t tol = 1e-6;
+  if (seperation_value >= 1.0 - tol) { return -1; }
+
+  i_t cover_size = 0;
+  for (i_t k = 0; k < solution.size(); k++) {
+    if (solution[k] == 0.0) { cover_size++; }
+  }
+
+  cut.i.clear();
+  cut.x.clear();
+  cut.i.reserve(cover_size);
+  cut.x.reserve(cover_size);
+
+  h = 0;
+  for (i_t k = 0; k < knapsack_inequality.i.size(); k++) {
+    const i_t j = knapsack_inequality.i[k];
+    if (!is_slack_[j]) {
+      if (solution[h] == 0.0) {
+        cut.i.push_back(j);
+        cut.x.push_back(-1.0);
+      }
+      h++;
+    }
+  }
+  cut_rhs = -cover_size + 1;
+  cut.sort();
+
+  // The cut is in the form: - sum_{j in cover} x_j >= -cover_size + 1
+  // Which is equivalent to: sum_{j in cover} x_j <= cover_size - 1
+
+  // Verify the cut is violated
+  f_t dot = cut.dot(xstar);
+  f_t violation = dot - cut_rhs;
+  printf("Knapsack cut %d violation %e < 0\n", knapsack_row, violation);
+
+  if (violation <= tol) { return -1; }
+  return 0;
+}
+
+template <typename i_t, typename f_t>
+f_t knapsack_generation_t<i_t, f_t>::greedy_knapsack_problem(const std::vector<f_t>& values,
+                                                             const std::vector<f_t>& weights,
+                                                             f_t rhs,
+                                                             std::vector<f_t>& solution)
+{
+  i_t n = weights.size();
+  solution.assign(n, 0.0);
+
+  // Build permutation
+  std::vector<i_t> perm(n);
+  std::iota(perm.begin(), perm.end(), 0);
+
+  std::vector<f_t> ratios;
+  ratios.resize(n);
+  for (i_t i = 0; i < n; i++) {
+    ratios[i] = values[i] / weights[i];
+  }
+
+  // Sort by value / weight ratio
+  std::sort(perm.begin(), perm.end(), [&](i_t i, i_t j) { return ratios[i] > ratios[j]; });
+
+  // Greedy select items with the best value / weight ratio until the remaining capacity is exhausted
+  f_t remaining   = rhs;
+  f_t total_value = 0.0;
+
+  for (i_t j : perm) {
+    if (weights[j] <= remaining) {
+      solution[j] = 1.0;
+      remaining -= weights[j];
+      total_value += values[j];
+    }
+  }
+
+  // Best single-item fallback
+  f_t best_single_value = 0.0;
+  i_t best_single_idx   = -1;
+
+  for (i_t j = 0; j < n; ++j) {
+    if (weights[j] <= rhs && values[j] > best_single_value) {
+      best_single_value = values[j];
+      best_single_idx   = j;
+    }
+  }
+
+  if (best_single_value > total_value) {
+    solution.assign(n, 0.0);
+    solution[best_single_idx] = 1.0;
+    return best_single_value;
+  }
+
+  return total_value;
+}
+
+template <typename i_t, typename f_t>
+f_t knapsack_generation_t<i_t, f_t>::solve_knapsack_problem(const std::vector<f_t>& values,
+                                                            const std::vector<f_t>& weights,
+                                                            f_t rhs,
+                                                            std::vector<f_t>& solution)
+{
+  // Solve the knapsack problem
+  // maximize sum_{j=0}^n values[j] * solution[j]
+  // subject to sum_{j=0}^n weights[j] * solution[j] <= rhs
+  // values: values of the items
+  // weights: weights of the items
+  // return the value of the solution
+
+  // Using approximate dynamic programming
+
+  i_t n = weights.size();
+  f_t objective = std::numeric_limits<f_t>::quiet_NaN();
+
+  // Compute the maximum value
+  f_t vmax = *std::max_element(values.begin(), values.end());
+
+  // Check if all the values are integers
+  bool all_integers = true;
+  const f_t integer_tol = 1e-5;
+  for (i_t j = 0; j < n; j++) {
+    if (std::abs(values[j] - std::round(values[j])) > integer_tol) {
+        all_integers = false;
+        break;
+    }
+  }
+
+  printf("all_integers %d\n", all_integers);
+
+  // Compute the scaling factor and comptue the scaled integer values
+  f_t scale = 1.0;
+  std::vector<i_t> scaled_values(n);
+  if (all_integers) {
+    for (i_t j = 0; j < n; j++) {
+      scaled_values[j] = static_cast<i_t>(std::floor(values[j]));
+    }
+  } else {
+    const f_t epsilon = 0.1;
+    scale             = epsilon * vmax / static_cast<f_t>(n);
+    if (scale <= 0.0) { return std::numeric_limits<f_t>::quiet_NaN(); }
+    printf("scale %g epsilon %g vmax %g n %d\n", scale, epsilon, vmax, n);
+    for (i_t i = 0; i < n; ++i) {
+      scaled_values[i] = static_cast<i_t>(std::floor(values[i] / scale));
+      //printf("scaled_values[%d] %d values[%d] %g\n", i, scaled_values[i], i, values[i]);
+    }
+  }
+
+  i_t sum_value = std::accumulate(scaled_values.begin(), scaled_values.end(), 0);
+  const i_t INT_INF = std::numeric_limits<i_t>::max() / 2;
+  printf("sum value %d\n", sum_value);
+  const i_t max_size = 10000;
+  if (sum_value <= 0.0 || sum_value >= max_size) {
+    printf("sum value %d is negative or too large using greedy solution\n", sum_value);
+    return greedy_knapsack_problem(values, weights, rhs, solution);
+  }
+
+  // dp(j, v) = minimum weight using first j items to get value v
+  dense_matrix_t<i_t, i_t> dp(n + 1, sum_value + 1, INT_INF);
+  dense_matrix_t<i_t, uint8_t> take(n + 1, sum_value + 1, 0);
+  dp(0, 0) = 0;
+  printf("start dp\n");
+
+  // 4. Dynamic programming
+  for (int j = 1; j <= n; ++j) {
+    for (int v = 0; v <= sum_value; ++v) {
+      // Do not take item i-1
+      dp(j, v) = dp(j - 1, v);
+
+      // Take item j-1 if possible
+      if (v >= scaled_values[j - 1]) {
+        i_t candidate = dp(j - 1, v - scaled_values[j - 1]) + static_cast<i_t>(std::floor(weights[j - 1]));
+        if (candidate < dp(j, v)) {
+          dp(j, v)   = candidate;
+          take(j, v) = 1;
+        }
+      }
+    }
+  }
+
+  // 5. Find best achievable value within capacity
+  i_t best_value = 0;
+  for (i_t v = 0; v <= sum_value; ++v) {
+    if (dp(n, v) <= rhs) { best_value = v; }
+  }
+
+  // 6. Backtrack to recover solution
+  i_t v = best_value;
+  for (i_t j = n; j >= 1; --j) {
+    if (take(j, v)) {
+      solution[j - 1] = 1.0;
+      v -= scaled_values[j - 1];
+    } else {
+      solution[j - 1] = 0.0;
+    }
+  }
+
+  objective = best_value * scale;
+  return objective;
+}
+
 template <typename i_t, typename f_t>
 void cut_generation_t<i_t, f_t>::generate_cuts(const lp_problem_t<i_t, f_t>& lp,
                                                const simplex_solver_settings_t<i_t, f_t>& settings,
@@ -196,12 +527,41 @@ void cut_generation_t<i_t, f_t>::generate_cuts(const lp_problem_t<i_t, f_t>& lp,
   // Generate Gomory Cuts
   generate_gomory_cuts(
     lp, settings, Arow, new_slacks, var_types, basis_update, xstar, basic_list, nonbasic_list);
+  settings.log.printf("Generated Gomory cuts\n");
 
+  // Generate Knapsack cuts
+  generate_knapsack_cuts(lp, settings, Arow, new_slacks, var_types, xstar);
+  settings.log.printf("Generated Knapsack cuts\n");
 
  // Generate MIR cuts
  // generate_mir_cuts(lp, settings, Arow, var_types, xstar);
 }
 
+template <typename i_t, typename f_t>
+void cut_generation_t<i_t, f_t>::generate_knapsack_cuts(
+  const lp_problem_t<i_t, f_t>& lp,
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  csc_matrix_t<i_t, f_t>& Arow,
+  const std::vector<i_t>& new_slacks,
+  const std::vector<variable_type_t>& var_types,
+  const std::vector<f_t>& xstar)
+{
+  if (knapsack_generation_.num_knapsack_constraints() > 0) {
+    for (i_t knapsack_row : knapsack_generation_.get_knapsack_constraints()) {
+      sparse_vector_t<i_t, f_t> cut(lp.num_cols, 0);
+      f_t cut_rhs;
+      i_t knapsack_status = knapsack_generation_.generate_knapsack_cuts(
+        lp, settings, Arow, new_slacks, var_types, xstar, knapsack_row, cut, cut_rhs);
+      if (knapsack_status == 0) {
+        settings.log.printf("Adding Knapsack cut %d\n", knapsack_row);
+        cut_pool_.add_cut(cut_type_t::KNAPSACK, cut, cut_rhs);
+      } else {
+        settings.log.printf("Knapsack cut %d is not violated. Skipping\n", knapsack_row);
+      }
+    }
+  }
+}
+
 template <typename i_t, typename f_t>
 void cut_generation_t<i_t, f_t>::generate_mir_cuts(const lp_problem_t<i_t, f_t>& lp,
                                                    const simplex_solver_settings_t<i_t, f_t>& settings,
@@ -260,7 +620,7 @@ void cut_generation_t<i_t, f_t>::generate_mir_cuts(const lp_problem_t<i_t, f_t>&
         }
 
         settings.log.printf("Adding MIR cut %d\n", i);
-        cut_pool_.add_cut(cut, cut_rhs);
+        cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_ROUNDING, cut, cut_rhs);
     }
   }
 }
@@ -310,16 +670,25 @@ void cut_generation_t<i_t, f_t>::generate_gomory_cuts(
       bool A_valid = false;
       f_t cut_A_distance = 0.0;
       if (mir_status == 0) {
-        mir.substitute_slacks(lp, Arow, cut_A, cut_A_rhs);
-        // Check that the cut is violated
-        f_t dot = cut_A.dot(xstar);
-        f_t cut_norm = cut_A.norm2_squared();
-        if (dot >= cut_A_rhs) {
-          settings.log.printf("Cut %d is not violated. Skipping\n", i);
+        if (cut_A.i.size() == 0) {
+          settings.log.printf("No coefficients in cut A\n");
           continue;
         }
-        cut_A_distance = (cut_A_rhs - dot) / std::sqrt(cut_norm);
-        A_valid = true;
+        mir.substitute_slacks(lp, Arow, cut_A, cut_A_rhs);
+        if (cut_A.i.size() == 0) {
+          settings.log.printf("No coefficients in cut A after substituting slacks\n");
+          A_valid = false;
+        } else {
+          // Check that the cut is violated
+          f_t dot      = cut_A.dot(xstar);
+          f_t cut_norm = cut_A.norm2_squared();
+          if (dot >= cut_A_rhs) {
+            settings.log.printf("Cut %d is not violated. Skipping\n", i);
+            continue;
+          }
+          cut_A_distance = (cut_A_rhs - dot) / std::sqrt(cut_norm);
+          A_valid        = true;
+        }
         //cut_pool_.add_cut(lp.num_cols, cut, cut_rhs);
       }
 
@@ -335,23 +704,34 @@ void cut_generation_t<i_t, f_t>::generate_gomory_cuts(
       bool B_valid = false;
       f_t cut_B_distance = 0.0;
       if (mir_status == 0) {
-        mir.substitute_slacks(lp, Arow, cut_B, cut_B_rhs);
-        // Check that the cut is violated
-        f_t dot = cut_B.dot(xstar);
-        f_t cut_norm = cut_B.norm2_squared();
-        if (dot >= cut_B_rhs) {
-          settings.log.printf("Cut %d is not violated. Skipping\n", i);
+        if (cut_B.i.size() == 0) {
+          settings.log.printf("No coefficients in cut B\n");
           continue;
         }
-        cut_B_distance = (cut_B_rhs - dot) / std::sqrt(cut_norm);
-        B_valid = true;
+        mir.substitute_slacks(lp, Arow, cut_B, cut_B_rhs);
+        if (cut_B.i.size() == 0) {
+          settings.log.printf("No coefficients in cut B after substituting slacks\n");
+          B_valid = false;
+        } else {
+          // Check that the cut is violated
+          f_t dot      = cut_B.dot(xstar);
+          f_t cut_norm = cut_B.norm2_squared();
+          if (dot >= cut_B_rhs) {
+            settings.log.printf("Cut %d is not violated. Skipping\n", i);
+            continue;
+          }
+          cut_B_distance = (cut_B_rhs - dot) / std::sqrt(cut_norm);
+          B_valid        = true;
+        }
         // cut_pool_.add_cut(lp.num_cols, cut_B, cut_B_rhs);
       }
 
       if ((cut_A_distance > cut_B_distance) && A_valid) {
-        cut_pool_.add_cut(cut_A, cut_A_rhs);
+        printf("Adding Gomory cut A: nz %d distance %e valid %d\n", cut_A.i.size(), cut_A_distance, A_valid);
+        cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_GOMORY, cut_A, cut_A_rhs);
       } else if (B_valid) {
-        cut_pool_.add_cut(cut_B, cut_B_rhs);
+        printf("Adding Gomory cut B: nz %d distance %e valid %d\n", cut_B.i.size(), cut_B_distance, B_valid);
+        cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_GOMORY, cut_B, cut_B_rhs);
       }
     }
   }
@@ -720,6 +1100,12 @@ i_t mixed_integer_rounding_cut_t<i_t, f_t>::generate_cut(
   cut.sort();
 
   cut_rhs = R;
+
+  if (cut.i.size() == 0) {
+    settings_.log.printf("No coefficients in cut\n");
+    return -1;
+  }
+
   return 0;
 }
 
@@ -735,38 +1121,62 @@ void mixed_integer_rounding_cut_t<i_t, f_t>::substitute_slacks(const lp_problem_
   i_t cut_nz = 0;
   std::vector<i_t> cut_indices;
   cut_indices.reserve(cut.i.size());
+
+#if 1
+  for (i_t j = 0; j < x_workspace_.size(); j++) {
+    if (x_workspace_[j] != 0.0) {
+      printf("Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]);
+      exit(1);
+    }
+    if (x_mark_[j] != 0) {
+      printf("Dirty x_mark_[%d] = %d\n", j, x_mark_[j]);
+      exit(1);
+    }
+  }
+#endif
+
+
+
   for (i_t k = 0; k < cut.i.size(); k++) {
     const i_t j  = cut.i[k];
     const f_t cj = cut.x[k];
     if (is_slack_[j]) {
       found_slack = true;
+
       // Do the substitution
       // Slack variable s_j participates in row i of the constraint matrix
       // Row i is of the form:
-      // sum_{k != j} A(i, k) * x_k + A(i, j) * s_j = rhs_i
+      // sum_{k != j} A(i, k) * x_k + s_j = rhs_i
       /// So we have that
       // s_j = rhs_i - sum_{k != j} A(i, k) * x_k
 
       // Our cut is of the form:
       // sum_{k != j} C(k) * x_k + C(j) * s_j >= cut_rhs
       // So the cut becomes
-      // sum_{k != j} C(k) * x_k + C(j) * (rhs_i - sum_{k != j} A(i, k) * x_k) >= cut_rhs
+      // sum_{k != j} C(k) * x_k + C(j) * (rhs_i - sum_{h != j} A(i, h) * x_h) >= cut_rhs
       // This is equivalent to:
-      // sum_{k != j} C(k) * x_k + sum_{k != j} -C(k) * A(i, k) * x_k >= cut_rhs - C(j) * rhs_i
+      // sum_{k != j} C(k) * x_k + sum_{h != j} -C(j) * A(i, h) * x_h >= cut_rhs - C(j) * rhs_i
       const i_t i         = slack_rows_[j];
+      //printf("Found slack %d in cut. lo %e up %e. Slack row %d\n", j, lp.lower[j], lp.upper[j], i);
       cut_rhs -= cj * lp.rhs[i];
       const i_t row_start = Arow.col_start[i];
       const i_t row_end   = Arow.col_start[i + 1];
       for (i_t q = row_start; q < row_end; q++) {
-        const i_t k = Arow.i[q];
-        if (k != j) {
-          const f_t aik = Arow.x[q];
-          x_workspace_[k] -= cj * aik;
-          if (!x_mark_[k]) {
-            x_mark_[k] = 1;
-            cut_indices.push_back(k);
+        const i_t h = Arow.i[q];
+        if (h != j) {
+          const f_t aih = Arow.x[q];
+          x_workspace_[h] -= cj * aih;
+          if (!x_mark_[h]) {
+            x_mark_[h] = 1;
+            cut_indices.push_back(h);
             cut_nz++;
           }
+        } else {
+            const f_t aij = Arow.x[q];
+            if (aij != 1.0) {
+                printf("Slack row %d has non-unit coefficient for variable %d\n", i, j);
+                exit(1);
+            }
         }
       }
 
@@ -794,13 +1204,27 @@ void mixed_integer_rounding_cut_t<i_t, f_t>::substitute_slacks(const lp_problem_
     }
     // Sort the cut
     cut.sort();
+  }
 
-    // Clear the workspace
-    for (i_t jj : cut_indices) {
-      x_workspace_[jj] = 0.0;
-      x_mark_[jj]      = 0;
+  // Clear the workspace
+  for (i_t jj : cut_indices) {
+    x_workspace_[jj] = 0.0;
+    x_mark_[jj]      = 0;
+  }
+
+
+#if 1
+  for (i_t j = 0; j < x_workspace_.size(); j++) {
+    if (x_workspace_[j] != 0.0) {
+      printf("Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]);
+      exit(1);
+    }
+    if (x_mark_[j] != 0) {
+      printf("Dirty x_mark_[%d] = %d\n", j, x_mark_[j]);
+      exit(1);
     }
   }
+#endif
 }
 
 template <typename i_t, typename f_t>
diff --git a/cpp/src/dual_simplex/cuts.hpp b/cpp/src/dual_simplex/cuts.hpp
index 9113b926e..ec877c274 100644
--- a/cpp/src/dual_simplex/cuts.hpp
+++ b/cpp/src/dual_simplex/cuts.hpp
@@ -18,6 +18,30 @@
 
 namespace cuopt::linear_programming::dual_simplex {
 
+enum cut_type_t : int8_t {
+   MIXED_INTEGER_GOMORY = 0,
+   MIXED_INTEGER_ROUNDING  = 1,
+   KNAPSACK = 2,
+};
+
+template <typename i_t, typename f_t>
+void print_cut_types(const std::vector<cut_type_t>& cut_types, const simplex_solver_settings_t<i_t, f_t>& settings) {
+  i_t num_gomory_cuts = 0;
+  i_t num_mir_cuts = 0;
+  i_t num_knapsack_cuts = 0;
+  for (i_t i = 0; i < cut_types.size(); i++) {
+    if (cut_types[i] == cut_type_t::MIXED_INTEGER_GOMORY) {
+      num_gomory_cuts++;
+    } else if (cut_types[i] == cut_type_t::MIXED_INTEGER_ROUNDING) {
+      num_mir_cuts++;
+    } else if (cut_types[i] == cut_type_t::KNAPSACK) {
+      num_knapsack_cuts++;
+    }
+  }
+  settings.log.printf("Gomory cuts: %d, MIR cuts: %d, Knapsack cuts: %d\n", num_gomory_cuts, num_mir_cuts, num_knapsack_cuts);
+}
+
+
 template <typename i_t, typename f_t>
 f_t minimum_violation(const csr_matrix_t<i_t, f_t>& C,
                       const std::vector<f_t>& cut_rhs,
@@ -32,6 +56,7 @@ f_t minimum_violation(const csr_matrix_t<i_t, f_t>& C,
   for (i_t k = 0; k < Cx.size(); k++) {
     if (Cx[k] <= cut_rhs[k]) {
       printf("C*x <= d for cut %d. C*x %e rhs %e\n", k, Cx[k], cut_rhs[k]);
+      exit(1);
     }
     min_cut_violation = std::min(min_cut_violation, Cx[k] - cut_rhs[k]);
   }
@@ -47,6 +72,7 @@ class cut_pool_t {
       cut_storage_(0, original_vars, 0),
       rhs_storage_(0),
       cut_age_(0),
+      cut_type_(0),
       scored_cuts_(0)
   {
   }
@@ -54,12 +80,12 @@ class cut_pool_t {
   // Add a cut in the form: cut'*x >= rhs.
   // We expect that the cut is violated by the current relaxation
   // cut'*xstart < rhs
-  void add_cut(const sparse_vector_t<i_t, f_t>& cut, f_t rhs);
+  void add_cut(cut_type_t cut_type, const sparse_vector_t<i_t, f_t>& cut, f_t rhs);
 
   void score_cuts(std::vector<f_t>& x_relax);
 
   // We return the cuts in the form best_cuts*x <= best_rhs
-  i_t get_best_cuts(csr_matrix_t<i_t, f_t>& best_cuts, std::vector<f_t>& best_rhs);
+  i_t get_best_cuts(csr_matrix_t<i_t, f_t>& best_cuts, std::vector<f_t>& best_rhs, std::vector<cut_type_t>& best_cut_types);
 
   void age_cuts();
 
@@ -78,6 +104,7 @@ class cut_pool_t {
   csr_matrix_t<i_t, f_t> cut_storage_;
   std::vector<f_t> rhs_storage_;
   std::vector<i_t> cut_age_;
+  std::vector<cut_type_t> cut_type_;
 
   i_t scored_cuts_;
   std::vector<f_t> cut_distances_;
@@ -88,10 +115,49 @@ class cut_pool_t {
 };
 
 template <typename i_t, typename f_t>
-class cut_generation_t {
+class knapsack_generation_t {
  public:
-  cut_generation_t(cut_pool_t<i_t, f_t>& cut_pool) : cut_pool_(cut_pool) {}
+  knapsack_generation_t(const lp_problem_t<i_t, f_t>& lp,
+                        const simplex_solver_settings_t<i_t, f_t>& settings,
+                        csc_matrix_t<i_t, f_t>& Arow,
+                        const std::vector<i_t>& new_slacks,
+                        const std::vector<variable_type_t>& var_types);
+
+  i_t generate_knapsack_cuts(const lp_problem_t<i_t, f_t>& lp,
+                             const simplex_solver_settings_t<i_t, f_t>& settings,
+                             csc_matrix_t<i_t, f_t>& Arow,
+                             const std::vector<i_t>& new_slacks,
+                             const std::vector<variable_type_t>& var_types,
+                             const std::vector<f_t>& xstar,
+                             i_t knapsack_row,
+                             sparse_vector_t<i_t, f_t>& cut,
+                             f_t& cut_rhs);
+
+  i_t num_knapsack_constraints() const { return knapsack_constraints_.size(); }
+  const std::vector<i_t>& get_knapsack_constraints() const { return knapsack_constraints_; }
+
+ private:
 
+  f_t greedy_knapsack_problem(const std::vector<f_t>& values, const std::vector<f_t>& weights, f_t rhs, std::vector<f_t>& solution);
+  f_t solve_knapsack_problem(const std::vector<f_t>& values, const std::vector<f_t>& weights, f_t rhs, std::vector<f_t>& solution);
+
+
+  std::vector<i_t> is_slack_;
+  std::vector<i_t> knapsack_constraints_;
+};
+
+template <typename i_t, typename f_t>
+class cut_generation_t {
+ public:
+  cut_generation_t(cut_pool_t<i_t, f_t>& cut_pool,
+                   const lp_problem_t<i_t, f_t>& lp,
+                   const simplex_solver_settings_t<i_t, f_t>& settings,
+                   csc_matrix_t<i_t, f_t>& Arow,
+                   const std::vector<i_t>& new_slacks,
+                   const std::vector<variable_type_t>& var_types)
+    : cut_pool_(cut_pool), knapsack_generation_(lp, settings, Arow, new_slacks, var_types)
+  {
+  }
 
   void generate_cuts(const lp_problem_t<i_t, f_t>& lp,
                      const simplex_solver_settings_t<i_t, f_t>& settings,
@@ -120,7 +186,15 @@ class cut_generation_t {
                          const std::vector<i_t>& new_slacks,
                          const std::vector<variable_type_t>& var_types,
                          const std::vector<f_t>& xstar);
+
+  void generate_knapsack_cuts(const lp_problem_t<i_t, f_t>& lp,
+                              const simplex_solver_settings_t<i_t, f_t>& settings,
+                              csc_matrix_t<i_t, f_t>& Arow,
+                              const std::vector<i_t>& new_slacks,
+                              const std::vector<variable_type_t>& var_types,
+                              const std::vector<f_t>& xstar);
   cut_pool_t<i_t, f_t>& cut_pool_;
+  knapsack_generation_t<i_t, f_t> knapsack_generation_;
 };
 
 template <typename i_t, typename f_t>
diff --git a/cpp/src/dual_simplex/dense_matrix.hpp b/cpp/src/dual_simplex/dense_matrix.hpp
index b1fc521b3..3f5287113 100644
--- a/cpp/src/dual_simplex/dense_matrix.hpp
+++ b/cpp/src/dual_simplex/dense_matrix.hpp
@@ -18,6 +18,8 @@ class dense_matrix_t {
  public:
   dense_matrix_t(i_t rows, i_t cols) : m(rows), n(cols), values(rows * cols, 0.0) {}
 
+  dense_matrix_t(i_t rows, i_t cols, f_t value) : m(rows), n(cols), values(rows * cols, value) {}
+
   void resize(i_t rows, i_t cols)
   {
     m = rows;

From 78cb1dcb4a2ce8ddb056d9657b4be751716e68cc Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Mon, 5 Jan 2026 17:16:40 -0800
Subject: [PATCH 18/27] Turn off sub-mip. Fix edge norms which was leading to
 crazy depth on b-ball and swath1. Add reliability branching as an option.
 Finally seeing good performance on swath1

---
 cpp/src/dual_simplex/branch_and_bound.cpp     | 203 +++++++++++++++++-
 cpp/src/dual_simplex/branch_and_bound.hpp     |   1 +
 cpp/src/dual_simplex/cuts.cpp                 |  45 +++-
 cpp/src/dual_simplex/phase2.cpp               |  51 ++++-
 cpp/src/dual_simplex/presolve.cpp             |   2 +-
 cpp/src/dual_simplex/pseudo_costs.cpp         | 134 ++++++++++++
 cpp/src/dual_simplex/pseudo_costs.hpp         |  10 +
 cpp/src/dual_simplex/sparse_vector.cpp        |  24 +++
 cpp/src/dual_simplex/sparse_vector.hpp        |   2 +
 cpp/src/mip/diversity/recombiners/sub_mip.cuh |   2 +-
 10 files changed, 450 insertions(+), 24 deletions(-)

diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp
index 9d4af809a..65403404b 100644
--- a/cpp/src/dual_simplex/branch_and_bound.cpp
+++ b/cpp/src/dual_simplex/branch_and_bound.cpp
@@ -216,6 +216,14 @@ branch_and_bound_t<i_t, f_t>::branch_and_bound_t(
   convert_user_problem(original_problem_, settings_, original_lp_, new_slacks_, dualize_info);
   full_variable_types(original_problem_, original_lp_, var_types_);
 
+  num_integer_variables_ = 0;
+  for (i_t j = 0; j < original_lp_.num_cols; j++) {
+    if (var_types_[j] == variable_type_t::INTEGER) {
+      num_integer_variables_++;
+    }
+  }
+  printf("num_integer_variables %d\n", num_integer_variables_);
+
   mutex_upper_.lock();
   upper_bound_ = inf;
   mutex_upper_.unlock();
@@ -475,6 +483,31 @@ mip_status_t branch_and_bound_t<i_t, f_t>::set_final_solution(mip_solution_t<i_t
 
   if (gap <= settings_.absolute_mip_gap_tol || gap_rel <= settings_.relative_mip_gap_tol) {
     mip_status = mip_status_t::OPTIMAL;
+#if 1
+    FILE* fid = NULL;
+    fid       = fopen("solution.dat", "w");
+    if (fid != NULL) {
+      printf("Writing solution.dat\n");
+
+      std::vector<f_t> residual = original_lp_.rhs;
+      matrix_vector_multiply(original_lp_.A, 1.0, incumbent_.x, -1.0, residual);
+      printf("|| A*x - b ||_inf %e\n", vector_norm_inf<i_t, f_t>(residual));
+      auto hash_combine_f = [](size_t seed, f_t x) {
+        seed ^= std::hash<f_t>{}(x) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+        return seed;
+      };
+      printf("incumbent size %ld original lp cols %d\n", incumbent_.x.size(), original_lp_.num_cols);
+      i_t n = original_lp_.num_cols;
+      size_t seed = n;
+      fprintf(fid, "%d\n", n);
+      for (i_t j = 0; j < n; ++j) {
+        fprintf(fid, "%.17g\n", incumbent_.x[j]);
+        seed = hash_combine_f(seed, incumbent_.x[j]);
+      }
+      printf("Solution hash: %20x\n", seed);
+      fclose(fid);
+    }
+#endif
     if (gap > 0 && gap <= settings_.absolute_mip_gap_tol) {
       settings_.log.printf("Optimal solution found within absolute MIP gap tolerance (%.1e)\n",
                            settings_.absolute_mip_gap_tol);
@@ -580,6 +613,16 @@ node_status_t branch_and_bound_t<i_t, f_t>::solve_node(search_tree_t<i_t, f_t>&
 {
   f_t abs_fathom_tol = settings_.absolute_mip_gap_tol / 10;
 
+  if (node_ptr->depth >= num_integer_variables_) {
+    printf("Depth %d >= num_integer_variables %d\n", node_ptr->depth, num_integer_variables_);
+    mip_node_t<i_t, f_t>* parent = node_ptr->parent;
+    while (parent != nullptr) {
+      printf("Parent depth %d\n", parent->depth);
+      printf("Parent branch var %d dir %d lower %e upper %e\n", parent->branch_var, parent->branch_dir, parent->branch_var_lower, parent->branch_var_upper);
+      parent = parent->parent;
+    }
+  }
+
   lp_solution_t<i_t, f_t> leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols);
   std::vector<variable_status_t>& leaf_vstatus = node_ptr->vstatus;
   assert(leaf_vstatus.size() == leaf_problem.num_cols);
@@ -602,12 +645,11 @@ node_status_t branch_and_bound_t<i_t, f_t>::solve_node(search_tree_t<i_t, f_t>&
     bound_strengthening(row_sense, lp_settings, leaf_problem, Arow, var_types_, bounds_changed);
 
   dual::status_t lp_status = dual::status_t::DUAL_UNBOUNDED;
+  std::vector<f_t> leaf_edge_norms = edge_norms_;  // = node.steepest_edge_norms;
 
   if (feasible) {
     i_t node_iter                    = 0;
     f_t lp_start_time                = tic();
-    std::vector<f_t> leaf_edge_norms = edge_norms_;  // = node.steepest_edge_norms;
-
     lp_status = dual_phase2(2,
                             0,
                             lp_start_time,
@@ -650,6 +692,23 @@ node_status_t branch_and_bound_t<i_t, f_t>::solve_node(search_tree_t<i_t, f_t>&
     i_t leaf_num_fractional =
       fractional_variables(settings_, leaf_solution.x, var_types_, leaf_fractional);
 
+    // Check if any of the fractional variables were fixed to their bounds
+    for (i_t j : leaf_fractional)
+    {
+      if (leaf_problem.lower[j] == leaf_problem.upper[j])
+      {
+        printf(
+          "Node %d: Fixed variable %d has a fractional value %e. Lower %e upper %e. Variable status %d\n",
+          node_ptr->node_id,
+          j,
+          leaf_solution.x[j],
+          leaf_problem.lower[j],
+          leaf_problem.upper[j],
+          leaf_vstatus[j]);
+      }
+    }
+
+
     f_t leaf_objective    = compute_objective(leaf_problem, leaf_solution.x);
     node_ptr->lower_bound = leaf_objective;
     search_tree.graphviz_node(log, node_ptr, "lower bound", leaf_objective);
@@ -670,8 +729,14 @@ node_status_t branch_and_bound_t<i_t, f_t>::solve_node(search_tree_t<i_t, f_t>&
 
     } else if (leaf_objective <= upper_bound + abs_fathom_tol) {
       // Choose fractional variable to branch on
+
+#ifdef RELIABLE_BRANCHING
+      const i_t branch_var =
+        pc_.reliable_variable_selection(leaf_problem, lp_settings, var_types_, leaf_vstatus, leaf_edge_norms, leaf_fractional, leaf_solution.x, leaf_objective, lp_settings.log);
+#else
       const i_t branch_var =
         pc_.variable_selection(leaf_fractional, leaf_solution.x, lp_settings.log);
+#endif
 
       assert(leaf_vstatus.size() == leaf_problem.num_cols);
       search_tree.branch(
@@ -1057,6 +1122,8 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   stats_.nodes_unexplored = 0;
   stats_.nodes_explored   = 0;
 
+  printf("Branch and bound solve called\n");
+
   if (guess_.size() != 0) {
     std::vector<f_t> crushed_guess;
     crush_primal_solution(original_problem_, original_lp_, guess_, new_slacks_, crushed_guess);
@@ -1151,6 +1218,96 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   cut_pool_t<i_t, f_t> cut_pool(original_lp_.num_cols, settings_);
   cut_generation_t<i_t, f_t> cut_generation(cut_pool, original_lp_, settings_, Arow, new_slacks_, var_types_);
 
+  std::vector<f_t> saved_solution;
+#if 1
+  printf("Trying to open solution.dat\n");
+  FILE* fid = NULL;
+  fid = fopen("solution.dat", "r");
+  if (fid != NULL)
+  {
+    i_t n_solution_dat;
+    i_t count = fscanf(fid, "%d\n", &n_solution_dat);
+    printf("Solution.dat variables %d =? %d =? %ld count %d\n", n_solution_dat, original_lp_.num_cols, solution.x.size(), count);
+    bool good = true;
+    if (count == 1 && n_solution_dat == original_lp_.num_cols)
+    {
+      printf("Opened solution.dat with %d number of variables\n", n_solution_dat);
+      saved_solution.resize(n_solution_dat);
+       for (i_t j = 0; j < n_solution_dat; j++)
+       {
+         count = fscanf(fid, "%lf", &saved_solution[j]);
+         if (count != 1)
+         {
+           printf("bad read solution.dat: j %d count %d\n", j, count);
+           good = false;
+           break;
+         }
+       }
+    } else {
+      good = false;
+    }
+    fclose(fid);
+
+    if (!good)
+    {
+      saved_solution.resize(0);
+      printf("Solution.dat is bad\n");
+    }
+    else
+    {
+      printf("Read solution file\n");
+
+      auto hash_combine_f = [](size_t seed, f_t x) {
+        seed ^= std::hash<f_t>{}(x) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+        return seed;
+      };
+      size_t seed = original_lp_.num_cols;
+      for (i_t j = 0; j < original_lp_.num_cols; ++j)
+      {
+        seed = hash_combine_f(seed, saved_solution[j]);
+      }
+      printf("Saved solution hash: %20x\n", seed);
+
+      FILE* fid = NULL;
+      fid       = fopen("solution.dat.2", "w");
+      if (fid != NULL) {
+        printf("Writing solution.dat.2\n");
+        i_t n = original_lp_.num_cols;
+        size_t seed = n;
+        fprintf(fid, "%d\n", n);
+        for (i_t j = 0; j < n; ++j) {
+          fprintf(fid, "%.17g\n", saved_solution[j]);
+        }
+        fclose(fid);
+      }
+
+      // Compute || A * x - b ||_inf
+      std::vector<f_t> residual = original_lp_.rhs;
+      matrix_vector_multiply(original_lp_.A, 1.0, saved_solution, -1.0, residual);
+      printf("Saved solution: || A*x - b ||_inf %e\n", vector_norm_inf<i_t, f_t>(residual));
+      f_t infeas = 0;
+      for (i_t j = 0; j < original_lp_.num_cols; j++) {
+        if (saved_solution[j] < original_lp_.lower[j] - 1e-6) {
+          f_t curr_infeas = (original_lp_.lower[j] - saved_solution[j]);
+          infeas += curr_infeas;
+          printf(
+            "j: %d saved solution %e lower %e\n", j, saved_solution[j], original_lp_.lower[j]);
+        }
+        if (saved_solution[j] > original_lp_.upper[j] + 1e-6) {
+          f_t curr_infeas = (saved_solution[j] - original_lp_.upper[j]);
+          infeas += curr_infeas;
+          printf(
+            "j %d saved solution %e upper %e\n", j, saved_solution[j], original_lp_.upper[j]);
+        }
+      }
+      printf("Bound infeasibility %e\n", infeas);
+    }
+  } else {
+    printf("Could not open solution.dat\n");
+  }
+#endif
+
+
   for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) {
     if (num_fractional == 0) {
 #ifdef PRINT_SOLUTION
@@ -1191,18 +1348,20 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
 
       // Generate cuts and add them to the cut pool
       cut_generation.generate_cuts(original_lp_, settings_, Arow, new_slacks_, var_types_, basis_update, root_relax_soln_.x, basic_list, nonbasic_list);
-      settings_.log.printf("Generated cuts\n");
 
       // Score the cuts
       cut_pool.score_cuts(root_relax_soln_.x);
-      settings_.log.printf("Scored cuts\n");
       // Get the best cuts from the cut pool
       csr_matrix_t<i_t, f_t> cuts_to_add(0, original_lp_.num_cols, 0);
       std::vector<f_t> cut_rhs;
       std::vector<cut_type_t> cut_types;
       i_t num_cuts = cut_pool.get_best_cuts(cuts_to_add, cut_rhs, cut_types);
-      settings_.log.printf("Got best cuts\n");
-      print_cut_types(cut_types, settings_);
+      if (num_cuts == 0)
+      {
+        settings_.log.printf("No cuts found\n");
+        break;
+      }
+      //print_cut_types(cut_types, settings_);
 
       cuts_to_add.check_matrix();
 
@@ -1217,9 +1376,26 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
 
 #if 1
       f_t min_cut_violation = minimum_violation(cuts_to_add, cut_rhs, root_relax_soln_.x);
-      settings_.log.printf("Min cut violation %e\n", min_cut_violation);
+      if (min_cut_violation < 1e-6) {
+        settings_.log.printf("Min cut violation %e\n", min_cut_violation);
+      }
 #endif
 
+      // Check against saved solution
+      if (saved_solution.size() > 0) {
+        csc_matrix_t<i_t, f_t> cuts_to_add_col(cuts_to_add.m, cuts_to_add.n, cuts_to_add.row_start[cuts_to_add.m]);
+        cuts_to_add.to_compressed_col(cuts_to_add_col);
+        std::vector<f_t> Cx(cuts_to_add.m);
+        matrix_vector_multiply(cuts_to_add_col, 1.0, saved_solution, 0.0, Cx);
+        for (i_t k = 0; k < num_cuts; k++) {
+          //printf("Cx[%d] = %e cut_rhs[%d] = %e\n", k, Cx[k], k, cut_rhs[k]);
+          if (Cx[k] > cut_rhs[k] + 1e-6) {
+            printf("Cut %d is violated by saved solution. Cx %e cut_rhs %e\n", k, Cx[k], cut_rhs[k]);
+            exit(1);
+          }
+        }
+      }
+
       // Resolve the LP with the new cuts
       settings_.log.printf("Solving LP with %d cuts (%d cut nonzeros). Cuts in pool %d. Total constraints %d\n",
                            num_cuts,
@@ -1340,6 +1516,19 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
     }
   }
 
+  if (edge_norms_.size() != original_lp_.num_cols)
+  {
+    edge_norms_.resize(original_lp_.num_cols, -1.0);
+  }
+  for (i_t k = 0; k < original_lp_.num_rows; k++)
+  {
+    const i_t j = basic_list[k];
+    if (edge_norms_[j] < 0.0)
+    {
+      edge_norms_[j] = 1e-4;
+    }
+  }
+
   pc_.resize(original_lp_.num_cols);
   strong_branching<i_t, f_t>(original_lp_,
                              settings_,
diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp
index ccbad335a..0943155a8 100644
--- a/cpp/src/dual_simplex/branch_and_bound.hpp
+++ b/cpp/src/dual_simplex/branch_and_bound.hpp
@@ -145,6 +145,7 @@ class branch_and_bound_t {
   lp_problem_t<i_t, f_t> original_lp_;
   std::vector<i_t> new_slacks_;
   std::vector<variable_type_t> var_types_;
+  i_t num_integer_variables_;
 
   // Local lower bounds for each thread
   std::vector<omp_atomic_t<f_t>> local_lower_bounds_;
diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp
index 606c46f24..643a1ee33 100644
--- a/cpp/src/dual_simplex/cuts.cpp
+++ b/cpp/src/dual_simplex/cuts.cpp
@@ -26,8 +26,10 @@ void cut_pool_t<i_t, f_t>::add_cut(cut_type_t cut_type, const sparse_vector_t<i_
     }
   }
 
-  cut_storage_.append_row(cut);
-  settings_.log.printf("Added cut %d to pool\n", cut_storage_.m - 1);
+  sparse_vector_t<i_t, f_t> cut_squeezed;
+  cut.squeeze(cut_squeezed);
+  cut_storage_.append_row(cut_squeezed);
+  //settings_.log.printf("Added cut %d to pool\n", cut_storage_.m - 1);
   rhs_storage_.push_back(rhs);
   cut_type_.push_back(cut_type);
   cut_age_.push_back(0);
@@ -124,7 +126,7 @@ void cut_pool_t<i_t, f_t>::score_cuts(std::vector<f_t>& x_relax)
     if (cut_age_[i] > 0) {
         settings_.log.printf("Adding cut with age %d\n", cut_age_[i]);
     }
-    settings_.log.printf("Scored cuts %d. Adding cut %d score %e\n", scored_cuts_, i, cut_scores_[i]);
+    //settings_.log.printf("Scored cuts %d. Adding cut %d score %e\n", scored_cuts_, i, cut_scores_[i]);
 
     best_cuts_.push_back(i);
     scored_cuts_++;
@@ -206,6 +208,7 @@ knapsack_generation_t<i_t, f_t>::knapsack_generation_t(
   for (i_t i = 0; i < lp.num_rows; i++) {
     const i_t row_start = Arow.col_start[i];
     const i_t row_end   = Arow.col_start[i + 1];
+    if (row_end - row_start < 3) { continue; }
     bool is_knapsack    = true;
     f_t sum_pos         = 0.0;
     //printf("i %d ", i);
@@ -527,11 +530,11 @@ void cut_generation_t<i_t, f_t>::generate_cuts(const lp_problem_t<i_t, f_t>& lp,
   // Generate Gomory Cuts
   generate_gomory_cuts(
     lp, settings, Arow, new_slacks, var_types, basis_update, xstar, basic_list, nonbasic_list);
-  settings.log.printf("Generated Gomory cuts\n");
+  //settings.log.printf("Generated Gomory cuts\n");
 
   // Generate Knapsack cuts
   generate_knapsack_cuts(lp, settings, Arow, new_slacks, var_types, xstar);
-  settings.log.printf("Generated Knapsack cuts\n");
+  //settings.log.printf("Generated Knapsack cuts\n");
 
  // Generate MIR cuts
  // generate_mir_cuts(lp, settings, Arow, var_types, xstar);
@@ -727,10 +730,10 @@ void cut_generation_t<i_t, f_t>::generate_gomory_cuts(
       }
 
       if ((cut_A_distance > cut_B_distance) && A_valid) {
-        printf("Adding Gomory cut A: nz %d distance %e valid %d\n", cut_A.i.size(), cut_A_distance, A_valid);
+        //printf("Adding Gomory cut A: nz %d distance %e valid %d\n", cut_A.i.size(), cut_A_distance, A_valid);
         cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_GOMORY, cut_A, cut_A_rhs);
       } else if (B_valid) {
-        printf("Adding Gomory cut B: nz %d distance %e valid %d\n", cut_B.i.size(), cut_B_distance, B_valid);
+        //printf("Adding Gomory cut B: nz %d distance %e valid %d\n", cut_B.i.size(), cut_B_distance, B_valid);
         cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_GOMORY, cut_B, cut_B_rhs);
       }
     }
@@ -918,6 +921,10 @@ void mixed_integer_rounding_cut_t<i_t, f_t>::initialize(const lp_problem_t<i_t,
     const i_t col_start = lp.A.col_start[j];
     const i_t i = lp.A.i[col_start];
     slack_rows_[j] = i;
+    if (lp.A.x[col_start] != 1.0) {
+      printf("Initialize: Slack row %d has non-unit coefficient %e for variable %d\n", i, lp.A.x[col_start], j);
+      exit(1);
+    }
   }
 
   needs_complement_ = false;
@@ -1174,7 +1181,7 @@ void mixed_integer_rounding_cut_t<i_t, f_t>::substitute_slacks(const lp_problem_
         } else {
             const f_t aij = Arow.x[q];
             if (aij != 1.0) {
-                printf("Slack row %d has non-unit coefficient for variable %d\n", i, j);
+                printf("Slack row %d has non-unit coefficient %e for variable %d\n", i, aij, j);
                 exit(1);
             }
         }
@@ -1199,6 +1206,21 @@ void mixed_integer_rounding_cut_t<i_t, f_t>::substitute_slacks(const lp_problem_
 
     for (i_t k = 0; k < cut_nz; k++) {
       const i_t j = cut_indices[k];
+
+      // Check for small coefficients
+      const f_t aj = x_workspace_[j];
+      if (std::abs(aj) < 1e-6) {
+        if (aj >= 0.0 && lp.upper[j] < inf) {
+          // Move this to the right-hand side
+          cut_rhs -= aj * lp.upper[j];
+          continue;
+        } else if (aj <= 0.0 && lp.lower[j] > -inf) {
+          cut_rhs += aj * lp.lower[j];
+          continue;
+        } else {
+        }
+      }
+
       cut.i.push_back(j);
       cut.x.push_back(x_workspace_[j]);
     }
@@ -1449,7 +1471,7 @@ void remove_cuts(lp_problem_t<i_t, f_t>& lp,
   }
 
   if (cuts_to_remove.size() > 0) {
-    settings.log.printf("Removing %d cuts\n", cuts_to_remove.size());
+    //settings.log.printf("Removing %d cuts\n", cuts_to_remove.size());
     std::vector<i_t> marked_rows(lp.num_rows, 0);
     for (i_t i : cuts_to_remove) {
       marked_rows[i] = 1;
@@ -1515,7 +1537,7 @@ void remove_cuts(lp_problem_t<i_t, f_t>& lp,
     lp.num_rows   = lp.A.m;
 
     new_slacks.clear();
-    new_slacks.resize(lp.num_cols);
+    new_slacks.reserve(lp.num_cols);
     for (i_t j = 0; j < lp.num_cols; j++) {
         if (new_is_slacks[j]) {
             new_slacks.push_back(j);
@@ -1528,7 +1550,8 @@ void remove_cuts(lp_problem_t<i_t, f_t>& lp,
     y             = new_solution_y;
     z             = new_solution_z;
 
-    settings.log.printf("After removal %d rows %d columns %d nonzeros\n",
+    settings.log.printf("Removed %d cuts. After removal %d rows %d columns %d nonzeros\n",
+                        cuts_to_remove.size(),
                         lp.num_rows,
                         lp.num_cols,
                         lp.A.col_start[lp.A.n]);
diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp
index 2ff075c15..34bdfbb3d 100644
--- a/cpp/src/dual_simplex/phase2.cpp
+++ b/cpp/src/dual_simplex/phase2.cpp
@@ -2014,7 +2014,8 @@ f_t amount_of_perturbation(const lp_problem_t<i_t, f_t>& lp, const std::vector<f
 }
 
 template <typename i_t, typename f_t>
-void prepare_optimality(const lp_problem_t<i_t, f_t>& lp,
+void prepare_optimality(i_t info,
+                        const lp_problem_t<i_t, f_t>& lp,
                         const simplex_solver_settings_t<i_t, f_t>& settings,
                         basis_update_mpf_t<i_t, f_t>& ft,
                         const std::vector<f_t>& objective,
@@ -2081,6 +2082,11 @@ void prepare_optimality(const lp_problem_t<i_t, f_t>& lp,
       settings.log.printf("\n");
     }
   }
+
+  if (primal_infeas > settings.primal_tol)
+  {
+    printf("Primal infeasibility %e. Info %d\n", primal_infeas, info);
+  }
 }
 
 template <typename i_t, typename f_t>
@@ -2324,6 +2330,22 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
       }
     }
   } else {
+
+    // Check that none of the basic variables have a steepest edge that is nonpositive
+    for (i_t k = 0; k < m; k++)
+    {
+      const i_t j = basic_list[k];
+      bool fix_needed = false;
+      if (delta_y_steepest_edge[j] <= 0.0)
+      {
+        fix_needed = true;
+        //printf("Basic variable %d has a nonpositive steepest edge %e\n", j, delta_y_steepest_edge[j]);
+        delta_y_steepest_edge[j] = 1e-4;
+      }
+      if (fix_needed) {
+        //printf("Basic variable had nonpositive steepest edge\n");
+      }
+    }
     settings.log.printf("using exisiting steepest edge %e\n",
                         vector_norm2<i_t, f_t>(delta_y_steepest_edge));
   }
@@ -2429,8 +2451,27 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
 
 #endif
 
+      //primal_infeasibility = phase2::compute_initial_primal_infeasibilities(
+      //  lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices);
+      if (0 && primal_infeasibility > settings.primal_tol) {
+
+        const i_t nz      = infeasibility_indices.size();
+        for (i_t k = 0; k < nz; ++k) {
+          const i_t j              = infeasibility_indices[k];
+          const f_t squared_infeas = squared_infeasibilities[j];
+          const f_t val            = squared_infeas / delta_y_steepest_edge[j];
+          if (squared_infeas >= 0.0 && delta_y_steepest_edge[j] < 0.0) {
+            printf("Iter %d potential leaving %d val %e squared infeas %e delta_y_steepest_edge %e\n", iter, j, val, squared_infeas, delta_y_steepest_edge[j]);
+            delta_y_steepest_edge[j] = 1e-4;
+          }
+        }
 
-      phase2::prepare_optimality(lp,
+        //printf("No leaving variable. Updated primal infeasibility: %e\n", primal_infeasibility);
+        continue;
+      }
+
+      phase2::prepare_optimality(0,
+                                 lp,
                                  settings,
                                  ft,
                                  objective,
@@ -2596,7 +2637,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
             // Need to reset the objective value, since we have recomputed x
             obj = phase2::compute_perturbed_objective(objective, x);
             if (dual_infeas <= settings.dual_tol && primal_infeasibility <= settings.primal_tol) {
-              phase2::prepare_optimality(lp,
+              phase2::prepare_optimality(1,
+                                         lp,
                                          settings,
                                          ft,
                                          objective,
@@ -2633,7 +2675,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
 
             if (primal_infeasibility <= settings.primal_tol &&
                 orig_dual_infeas <= settings.dual_tol) {
-              phase2::prepare_optimality(lp,
+              phase2::prepare_optimality(2,
+                                         lp,
                                          settings,
                                          ft,
                                          objective,
diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp
index 6c55f5623..d9e37b799 100644
--- a/cpp/src/dual_simplex/presolve.cpp
+++ b/cpp/src/dual_simplex/presolve.cpp
@@ -213,7 +213,7 @@ bool bound_strengthening(const std::vector<char>& row_sense,
 
       if (new_lb > new_ub + 1e-6) {
         settings.log.printf(
-          "Iter:: %d, Infeasible variable after update %d, %e > %e\n", iter, k, new_lb, new_ub);
+          "Iter:: %d, Infeasible variable after update %d, new_lb = %e > %e = new_ub\n", iter, k, new_lb, new_ub);
         return false;
       }
       if (new_lb != old_lb || new_ub != old_ub) {
diff --git a/cpp/src/dual_simplex/pseudo_costs.cpp b/cpp/src/dual_simplex/pseudo_costs.cpp
index ca3e58041..4d3a1f830 100644
--- a/cpp/src/dual_simplex/pseudo_costs.cpp
+++ b/cpp/src/dual_simplex/pseudo_costs.cpp
@@ -132,6 +132,39 @@ void strong_branch_helper(i_t start,
   }
 }
 
+template <typename i_t, typename f_t>
+f_t trial_branching(const lp_problem_t<i_t, f_t>& original_lp,
+                    const simplex_solver_settings_t<i_t, f_t>& settings,
+                    const std::vector<variable_type_t>& var_types,
+                    const std::vector<variable_status_t>& root_vstatus,
+                    const std::vector<f_t>& edge_norms,
+                    i_t branch_var,
+                    f_t branch_var_lower,
+                    f_t branch_var_upper)
+{
+  lp_problem_t child_problem      = original_lp;
+  child_problem.lower[branch_var] = branch_var_lower;
+  child_problem.upper[branch_var] = branch_var_upper;
+
+  simplex_solver_settings_t<i_t, f_t> child_settings = settings;
+  child_settings.set_log(false);
+  f_t lp_start_time              = tic();
+  child_settings.iteration_limit = 200;
+  lp_solution_t<i_t, f_t> solution(original_lp.num_rows, original_lp.num_cols);
+  i_t iter                               = 0;
+  std::vector<variable_status_t> vstatus = root_vstatus;
+  std::vector<f_t> child_edge_norms      = edge_norms;
+  dual::status_t status                  = dual_phase2(
+    2, 0, lp_start_time, child_problem, child_settings, vstatus, solution, iter, child_edge_norms);
+  printf("Trial branching on variable %d. Lo: %e Up: %e. Iter %d. Status %d. Obj %e\n", branch_var, child_problem.lower[branch_var], child_problem.upper[branch_var], iter, status, compute_objective(child_problem, solution.x));
+
+  if (status == dual::status_t::OPTIMAL || status == dual::status_t::ITERATION_LIMIT || status == dual::status_t::CUTOFF) {
+    return compute_objective(child_problem, solution.x);
+  } else {
+    return std::numeric_limits<f_t>::quiet_NaN();
+  }
+}
+
 }  // namespace
 
 template <typename i_t, typename f_t>
@@ -310,6 +343,107 @@ i_t pseudo_costs_t<i_t, f_t>::variable_selection(const std::vector<i_t>& fractio
   return branch_var;
 }
 
+template <typename i_t, typename f_t>
+i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(const lp_problem_t<i_t, f_t>& lp,
+                                                 const simplex_solver_settings_t<i_t, f_t>& settings,
+                                                 const std::vector<variable_type_t>& var_types,
+                                                 const std::vector<variable_status_t>& vstatus,
+                                                 const std::vector<f_t>& edge_norms,
+                                                 const std::vector<i_t>& fractional,
+                                                 const std::vector<f_t>& solution,
+                                                 f_t current_obj,
+                                                 logger_t& log)
+{
+  mutex.lock();
+
+  const i_t num_fractional = fractional.size();
+  std::vector<f_t> pseudo_cost_up(num_fractional);
+  std::vector<f_t> pseudo_cost_down(num_fractional);
+  std::vector<f_t> score(num_fractional);
+
+  i_t num_initialized_down;
+  i_t num_initialized_up;
+  f_t pseudo_cost_down_avg;
+  f_t pseudo_cost_up_avg;
+
+  initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg);
+
+  mutex.unlock();
+
+  log.printf("PC: num initialized down %d up %d avg down %e up %e\n",
+             num_initialized_down,
+             num_initialized_up,
+             pseudo_cost_down_avg,
+             pseudo_cost_up_avg);
+
+
+  const i_t reliable_threshold = 1;
+
+  for (i_t k = 0; k < num_fractional; k++) {
+    const i_t j = fractional[k];
+    mutex.lock();
+    if (pseudo_cost_num_down[j] >= reliable_threshold) {
+      pseudo_cost_down[k] = pseudo_cost_sum_down[j] / pseudo_cost_num_down[j];
+      mutex.unlock();
+    } else {
+      mutex.unlock();
+      // Do trial branching on the down branch
+      f_t obj = trial_branching(lp, settings, var_types, vstatus, edge_norms, j, lp.lower[j], std::floor(solution[j]));
+      if (!std::isnan(obj)) {
+        f_t change_in_obj = obj - current_obj;
+        f_t change_in_x = solution[j] - std::floor(solution[j]);
+        mutex.lock();
+        pseudo_cost_sum_down[j] += change_in_obj / change_in_x;
+        pseudo_cost_num_down[j]++;
+        pseudo_cost_down[k] = pseudo_cost_sum_down[j] / pseudo_cost_num_down[j];
+        mutex.unlock();
+      }
+    }
+
+    mutex.lock();
+    if (pseudo_cost_num_up[j] >= reliable_threshold) {
+      pseudo_cost_up[k] = pseudo_cost_sum_up[j] / pseudo_cost_num_up[j];
+      mutex.unlock();
+    } else {
+      mutex.unlock();
+      // Do trial branching on the up branch
+      f_t obj = trial_branching(lp, settings, var_types, vstatus, edge_norms, j, std::ceil(solution[j]), lp.upper[j]);
+      if (!std::isnan(obj)) {
+        f_t change_in_obj = obj - current_obj;
+        f_t change_in_x = std::ceil(solution[j]) - solution[j];
+        mutex.lock();
+        pseudo_cost_sum_up[j] += change_in_obj / change_in_x;
+        pseudo_cost_num_up[j]++;
+        pseudo_cost_up[k] = pseudo_cost_sum_up[j] / pseudo_cost_num_up[j];
+        mutex.unlock();
+      }
+    }
+    constexpr f_t eps = 1e-6;
+    const f_t f_down  = solution[j] - std::floor(solution[j]);
+    const f_t f_up    = std::ceil(solution[j]) - solution[j];
+    score[k] =
+      std::max(f_down * pseudo_cost_down[k], eps) * std::max(f_up * pseudo_cost_up[k], eps);
+  }
+
+  i_t branch_var = fractional[0];
+  f_t max_score  = -1;
+  i_t select     = -1;
+  for (i_t k = 0; k < num_fractional; k++) {
+    if (score[k] > max_score) {
+      max_score  = score[k];
+      branch_var = fractional[k];
+      select     = k;
+    }
+  }
+
+  log.printf(
+    "pc branching on %d. Value %e. Score %e\n", branch_var, solution[branch_var], score[select]);
+
+  mutex.unlock();
+
+  return branch_var;
+}
+
 template <typename i_t, typename f_t>
 void pseudo_costs_t<i_t, f_t>::update_pseudo_costs_from_strong_branching(
   const std::vector<i_t>& fractional, const std::vector<f_t>& root_soln)
diff --git a/cpp/src/dual_simplex/pseudo_costs.hpp b/cpp/src/dual_simplex/pseudo_costs.hpp
index 799cdc3ff..20b2198e4 100644
--- a/cpp/src/dual_simplex/pseudo_costs.hpp
+++ b/cpp/src/dual_simplex/pseudo_costs.hpp
@@ -47,6 +47,16 @@ class pseudo_costs_t {
                          const std::vector<f_t>& solution,
                          logger_t& log);
 
+  i_t reliable_variable_selection(const lp_problem_t<i_t, f_t>& lp,
+                                  const simplex_solver_settings_t<i_t, f_t>& settings,
+                                  const std::vector<variable_type_t>& var_types,
+                                  const std::vector<variable_status_t>& vstatus,
+                                  const std::vector<f_t>& edge_norms,
+                                  const std::vector<i_t>& fractional,
+                                  const std::vector<f_t>& solution,
+                                  f_t current_obj,
+                                  logger_t& log);
+
   void update_pseudo_costs_from_strong_branching(const std::vector<i_t>& fractional,
                                                  const std::vector<f_t>& root_soln);
   std::vector<f_t> pseudo_cost_sum_up;
diff --git a/cpp/src/dual_simplex/sparse_vector.cpp b/cpp/src/dual_simplex/sparse_vector.cpp
index 3ba981539..a8bd06afa 100644
--- a/cpp/src/dual_simplex/sparse_vector.cpp
+++ b/cpp/src/dual_simplex/sparse_vector.cpp
@@ -233,6 +233,30 @@ f_t sparse_vector_t<i_t, f_t>::find_coefficient(i_t index) const
   return std::numeric_limits<f_t>::quiet_NaN();
 }
 
+template <typename i_t, typename f_t>
+void sparse_vector_t<i_t, f_t>::squeeze(sparse_vector_t<i_t, f_t>& y) const
+{
+  y.n = n;
+
+  i_t nz = 0;
+  const i_t n = x.size();
+  for (i_t k = 0; k < n; k++) {
+    if (x[k] != 0.0) {
+      nz++;
+    }
+  }
+  y.i.reserve(nz);
+  y.x.reserve(nz);
+  y.i.clear();
+  y.x.clear();
+  for (i_t k = 0; k < n; k++) {
+    if (x[k] != 0.0) {
+      y.i.push_back(i[k]);
+      y.x.push_back(x[k]);
+    }
+  }
+}
+
 #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE
 template class sparse_vector_t<int, double>;
 #endif
diff --git a/cpp/src/dual_simplex/sparse_vector.hpp b/cpp/src/dual_simplex/sparse_vector.hpp
index 3badeed12..c56ebf6d9 100644
--- a/cpp/src/dual_simplex/sparse_vector.hpp
+++ b/cpp/src/dual_simplex/sparse_vector.hpp
@@ -51,6 +51,8 @@ class sparse_vector_t {
   void negate();
   f_t find_coefficient(i_t index) const;
 
+  void squeeze(sparse_vector_t<i_t, f_t>& y) const;
+
   i_t n;
   std::vector<i_t> i;
   std::vector<f_t> x;
diff --git a/cpp/src/mip/diversity/recombiners/sub_mip.cuh b/cpp/src/mip/diversity/recombiners/sub_mip.cuh
index 62fb52fe1..7ea53a73d 100644
--- a/cpp/src/mip/diversity/recombiners/sub_mip.cuh
+++ b/cpp/src/mip/diversity/recombiners/sub_mip.cuh
@@ -86,7 +86,7 @@ class sub_mip_recombiner_t : public recombiner_t<i_t, f_t> {
     trivial_presolve(fixed_problem);
     fixed_problem.check_problem_representation(true);
     // brute force rounding threshold is 8
-    const bool run_sub_mip                             = fixed_problem.n_integer_vars > 8;
+    const bool run_sub_mip                             = 0 && fixed_problem.n_integer_vars > 8;
     dual_simplex::mip_status_t branch_and_bound_status = dual_simplex::mip_status_t::UNSET;
     dual_simplex::mip_solution_t<i_t, f_t> branch_and_bound_solution(1);
     if (run_sub_mip) {

From 1e177432ebf286eaad917c6df441ce0c172f5aad Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Tue, 6 Jan 2026 11:58:19 -0800
Subject: [PATCH 19/27] Check for reduced cost variable fixings

---
 cpp/src/dual_simplex/branch_and_bound.cpp | 108 +++++++++++++++++++++-
 cpp/src/dual_simplex/branch_and_bound.hpp |   2 +
 cpp/src/dual_simplex/cuts.cpp             |  53 +++++++++--
 cpp/src/dual_simplex/solution.hpp         |   2 +-
 4 files changed, 154 insertions(+), 11 deletions(-)

diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp
index 65403404b..d2175f997 100644
--- a/cpp/src/dual_simplex/branch_and_bound.cpp
+++ b/cpp/src/dual_simplex/branch_and_bound.cpp
@@ -224,6 +224,24 @@ branch_and_bound_t<i_t, f_t>::branch_and_bound_t(
   }
   printf("num_integer_variables %d\n", num_integer_variables_);
 
+  // Check slack
+  printf("slacks size %ld m %d\n", new_slacks_.size(), original_lp_.num_rows);
+  for (i_t slack : new_slacks_) {
+    const i_t col_start = original_lp_.A.col_start[slack];
+    const i_t col_end = original_lp_.A.col_start[slack + 1];
+    const i_t col_len = col_end - col_start;
+    if (col_len != 1) {
+      printf("Slack %d has %d nzs\n", slack, col_len);
+      exit(1);
+    }
+    const i_t i = original_lp_.A.i[col_start];
+    const f_t x = original_lp_.A.x[col_start];
+    if (std::abs(x) != 1.0) {
+      printf("Slack %d row %d has non-unit coefficient %e\n", slack, i, x);
+      exit(1);
+    }
+  }
+
   mutex_upper_.lock();
   upper_bound_ = inf;
   mutex_upper_.unlock();
@@ -262,6 +280,84 @@ i_t branch_and_bound_t<i_t, f_t>::get_heap_size()
   return size;
 }
 
+template <typename i_t, typename f_t>
+void branch_and_bound_t<i_t, f_t>::find_reduced_cost_fixings(f_t upper_bound)
+{
+  printf("Finding reduced cost fixings\n");
+  mutex_original_lp_.lock();
+  std::vector<f_t> reduced_costs = root_relax_soln_.z;
+  std::vector<f_t> lower_bounds = original_lp_.lower;
+  std::vector<f_t> upper_bounds = original_lp_.upper;
+  std::vector<bool> bounds_changed(original_lp_.num_cols, false);
+  const f_t root_obj = compute_objective(original_lp_, root_relax_soln_.x);
+  const f_t threshold = 1e-3;
+  const f_t weaken = 1e-5;
+  i_t num_improved = 0;
+  i_t num_fixed = 0;
+  for (i_t j = 0; j < original_lp_.num_cols; j++) {
+    //printf("Variable %d type %d reduced cost %e\n", j, var_types_[j], reduced_costs[j]);
+    if (var_types_[j] == variable_type_t::INTEGER && reduced_costs[j] > threshold) {
+      const f_t lower_j = original_lp_.lower[j];
+      const f_t upper_j = original_lp_.upper[j];
+      const f_t abs_gap = upper_bound - root_obj;
+      f_t reduced_cost_upper_bound = upper_j;
+      f_t reduced_cost_lower_bound = lower_j;
+      if (lower_j > -inf && reduced_costs[j] > 0)
+      {
+        const f_t new_upper_bound = lower_j + abs_gap/reduced_costs[j];
+        reduced_cost_upper_bound = std::floor(new_upper_bound + weaken);
+        if (reduced_cost_upper_bound < upper_j)
+        {
+          //printf("Improved upper bound for variable %d from %e to %e (%e)\n", j, upper_j, reduced_cost_upper_bound, new_upper_bound);
+          num_improved++;
+          upper_bounds[j] = reduced_cost_upper_bound;
+          bounds_changed[j] = true;
+        }
+      }
+      if (upper_j < inf && reduced_costs[j] < 0)
+      {
+        const f_t new_lower_bound = upper_j + abs_gap/reduced_costs[j];
+        reduced_cost_lower_bound = std::ceil(new_lower_bound - weaken);
+        if (reduced_cost_lower_bound > lower_j)
+        {
+          //printf("Improved lower bound for variable %d from %e to %e (%e)\n", j, lower_j, reduced_cost_lower_bound, new_lower_bound);
+          num_improved++;
+          lower_bounds[j] = reduced_cost_lower_bound;
+          bounds_changed[j] = true;
+        }
+      }
+      if (reduced_cost_upper_bound <= reduced_cost_lower_bound)
+      {
+        num_fixed++;
+      }
+    }
+  }
+
+  printf("Reduced costs: Found %d improved bounds and %d fixed variables (%.1f%%)\n", num_improved, num_fixed, 100.0*static_cast<f_t>(num_fixed)/static_cast<f_t>(num_integer_variables_));
+
+  if (num_improved > 0) {
+    lp_problem_t<i_t, f_t> new_lp = original_lp_;
+    new_lp.lower                  = lower_bounds;
+    new_lp.upper                  = upper_bounds;
+    std::vector<char> row_sense;
+    csc_matrix_t<i_t, f_t> Arow(original_lp_.num_rows,
+                                original_lp_.num_cols,
+                                original_lp_.A.col_start[original_lp_.num_cols]);
+    original_lp_.A.transpose(Arow);
+    bool feasible =
+      bound_strengthening(row_sense, settings_, new_lp, Arow, var_types_, bounds_changed);
+
+    num_improved = 0;
+    for (i_t j = 0; j < original_lp_.num_cols; j++) {
+      if (new_lp.lower[j] > original_lp_.lower[j]) { num_improved++; }
+      if (new_lp.upper[j] < original_lp_.upper[j]) { num_improved++; }
+    }
+    printf("Bound strengthening: Found %d improved bounds\n", num_improved);
+  }
+
+  mutex_original_lp_.unlock();
+}
+
 template <typename i_t, typename f_t>
 void branch_and_bound_t<i_t, f_t>::set_new_solution(const std::vector<f_t>& solution)
 {
@@ -319,6 +415,8 @@ void branch_and_bound_t<i_t, f_t>::set_new_solution(const std::vector<f_t>& solu
         user_lower,
         gap.c_str(),
         toc(stats_.start_time));
+
+      find_reduced_cost_fixings(obj);
     } else {
       settings_.log.printf("New solution from primal heuristics. Objective %+.6e. Time %.2f\n",
                            compute_user_objective(original_lp_, obj),
@@ -438,6 +536,8 @@ void branch_and_bound_t<i_t, f_t>::repair_heuristic_solutions()
             uncrush_primal_solution(original_problem_, original_lp_, repaired_solution, original_x);
             settings_.solution_callback(original_x, repaired_obj);
           }
+
+          find_reduced_cost_fixings(obj);
         }
 
         mutex_upper_.unlock();
@@ -571,6 +671,8 @@ void branch_and_bound_t<i_t, f_t>::add_feasible_solution(f_t leaf_objective,
                          user_mip_gap<f_t>(obj, lower).c_str(),
                          toc(stats_.start_time));
 
+    find_reduced_cost_fixings(upper_bound_);
+
     send_solution = true;
   }
 
@@ -613,8 +715,8 @@ node_status_t branch_and_bound_t<i_t, f_t>::solve_node(search_tree_t<i_t, f_t>&
 {
   f_t abs_fathom_tol = settings_.absolute_mip_gap_tol / 10;
 
-  if (node_ptr->depth >= num_integer_variables_) {
-    printf("Depth %d >= num_integer_variables %d\n", node_ptr->depth, num_integer_variables_);
+  if (node_ptr->depth > num_integer_variables_) {
+    printf("Depth %d > num_integer_variables %d\n", node_ptr->depth, num_integer_variables_);
     mip_node_t<i_t, f_t>* parent = node_ptr->parent;
     while (parent != nullptr) {
       printf("Parent depth %d\n", parent->depth);
@@ -1478,6 +1580,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
 
       local_lower_bounds_.assign(settings_.num_bfs_threads, root_objective_);
 
+      mutex_original_lp_.lock();
       remove_cuts(original_lp_,
                   settings_,
                   Arow,
@@ -1491,6 +1594,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
                   basic_list,
                   nonbasic_list,
                   basis_update);
+      mutex_original_lp_.unlock();
 
       fractional.clear();
       num_fractional = fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional);
diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp
index 0943155a8..1525b3d7c 100644
--- a/cpp/src/dual_simplex/branch_and_bound.hpp
+++ b/cpp/src/dual_simplex/branch_and_bound.hpp
@@ -131,6 +131,8 @@ class branch_and_bound_t {
   f_t get_lower_bound();
   i_t get_heap_size();
 
+  void find_reduced_cost_fixings(f_t upper_bound);
+
   // The main entry routine. Returns the solver status and populates solution with the incumbent.
   mip_status_t solve(mip_solution_t<i_t, f_t>& solution);
 
diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp
index 643a1ee33..9c163abd6 100644
--- a/cpp/src/dual_simplex/cuts.cpp
+++ b/cpp/src/dual_simplex/cuts.cpp
@@ -921,7 +921,7 @@ void mixed_integer_rounding_cut_t<i_t, f_t>::initialize(const lp_problem_t<i_t,
     const i_t col_start = lp.A.col_start[j];
     const i_t i = lp.A.i[col_start];
     slack_rows_[j] = i;
-    if (lp.A.x[col_start] != 1.0) {
+    if (std::abs(lp.A.x[col_start]) != 1.0) {
       printf("Initialize: Slack row %d has non-unit coefficient %e for variable %d\n", i, lp.A.x[col_start], j);
       exit(1);
     }
@@ -1149,30 +1149,43 @@ void mixed_integer_rounding_cut_t<i_t, f_t>::substitute_slacks(const lp_problem_
     const f_t cj = cut.x[k];
     if (is_slack_[j]) {
       found_slack = true;
+      const i_t slack_start = lp.A.col_start[j];
+      const i_t slack_end = lp.A.col_start[j + 1];
+      const i_t slack_len = slack_end - slack_start;
+      if (slack_len != 1) {
+        printf("Slack %d has %d nzs in colum\n", j, slack_len);
+        exit(1);
+      }
+      const f_t alpha = lp.A.x[slack_start];
+      if (std::abs(alpha) != 1.0) {
+        printf("Slack %d has non-unit coefficient %e\n", j, alpha);
+        exit(1);
+      }
 
       // Do the substitution
       // Slack variable s_j participates in row i of the constraint matrix
       // Row i is of the form:
-      // sum_{k != j} A(i, k) * x_k + s_j = rhs_i
+      // sum_{k != j} A(i, k) * x_k + alpha * s_j = rhs_i
+      // where alpha = +1/-1
       /// So we have that
-      // s_j = rhs_i - sum_{k != j} A(i, k) * x_k
+      // s_j = (rhs_i - sum_{k != j} A(i, k) * x_k)/alpha
 
       // Our cut is of the form:
       // sum_{k != j} C(k) * x_k + C(j) * s_j >= cut_rhs
       // So the cut becomes
-      // sum_{k != j} C(k) * x_k + C(j) * (rhs_i - sum_{h != j} A(i, h) * x_h) >= cut_rhs
+      // sum_{k != j} C(k) * x_k + C(j)/alpha * (rhs_i - sum_{h != j} A(i, h) * x_h) >= cut_rhs
       // This is equivalent to:
-      // sum_{k != j} C(k) * x_k + sum_{h != j} -C(j) * A(i, h) * x_h >= cut_rhs - C(j) * rhs_i
+      // sum_{k != j} C(k) * x_k + sum_{h != j} -C(j)/alpha * A(i, h) * x_h >= cut_rhs - C(j)/alpha * rhs_i
       const i_t i         = slack_rows_[j];
       //printf("Found slack %d in cut. lo %e up %e. Slack row %d\n", j, lp.lower[j], lp.upper[j], i);
-      cut_rhs -= cj * lp.rhs[i];
+      cut_rhs -= cj * lp.rhs[i] / alpha;
       const i_t row_start = Arow.col_start[i];
       const i_t row_end   = Arow.col_start[i + 1];
       for (i_t q = row_start; q < row_end; q++) {
         const i_t h = Arow.i[q];
         if (h != j) {
           const f_t aih = Arow.x[q];
-          x_workspace_[h] -= cj * aih;
+          x_workspace_[h] -= cj * aih / alpha;
           if (!x_mark_[h]) {
             x_mark_[h] = 1;
             cut_indices.push_back(h);
@@ -1180,7 +1193,7 @@ void mixed_integer_rounding_cut_t<i_t, f_t>::substitute_slacks(const lp_problem_
           }
         } else {
             const f_t aij = Arow.x[q];
-            if (aij != 1.0) {
+            if (std::abs(aij)!= 1.0) {
                 printf("Slack row %d has non-unit coefficient %e for variable %d\n", i, aij, j);
                 exit(1);
             }
@@ -1302,6 +1315,8 @@ i_t add_cuts(const simplex_solver_settings_t<i_t, f_t>& settings,
   csc_matrix_t<i_t, f_t> new_A_col(lp.num_rows + p, lp.num_cols, 1);
   new_A_row.to_compressed_col(new_A_col);
 
+  printf("slacks size %ld m %d\n", new_slacks.size(), lp.num_rows);
+
   // Add in slacks variables for the new rows
   lp.lower.resize(lp.num_cols + p);
   lp.upper.resize(lp.num_cols + p);
@@ -1320,12 +1335,26 @@ i_t add_cuts(const simplex_solver_settings_t<i_t, f_t>& settings,
     lp.upper[j]     = inf;
     lp.objective[j] = 0.0;
     new_slacks.push_back(j);
+    printf("Added slack %d\n", j);
   }
   settings.log.debug("Done adding slacks\n");
   new_A_col.col_start[lp.num_cols + p] = nz;
   new_A_col.n                          = lp.num_cols + p;
 
   lp.A         = new_A_col;
+
+  // Check that all slack columns have length 1
+  for (i_t slack: new_slacks) {
+    const i_t col_start = lp.A.col_start[slack];
+    const i_t col_end = lp.A.col_start[slack + 1];
+    const i_t col_len = col_end - col_start;
+    if (col_len != 1) {
+      printf("Add cuts: Slack %d has %d nzs in column\n", slack, col_len);
+      exit(1);
+    }
+  }
+
+
   i_t old_rows = lp.num_rows;
   lp.num_rows += p;
   i_t old_cols = lp.num_cols;
@@ -1449,6 +1478,14 @@ void remove_cuts(lp_problem_t<i_t, f_t>& lp,
   std::vector<i_t> is_slack(lp.num_cols, 0);
   for (i_t j : new_slacks) {
     is_slack[j] = 1;
+    // Check that slack column length is 1
+    const i_t col_start = lp.A.col_start[j];
+    const i_t col_end = lp.A.col_start[j + 1];
+    const i_t col_len = col_end - col_start;
+    if (col_len != 1) {
+      printf("Remove cuts: Slack %d has %d nzs in column\n", j, col_len);
+      exit(1);
+    }
   }
 
   for (i_t k = original_rows; k < lp.num_rows; k++) {
diff --git a/cpp/src/dual_simplex/solution.hpp b/cpp/src/dual_simplex/solution.hpp
index d1d745cbd..d882e21e2 100644
--- a/cpp/src/dual_simplex/solution.hpp
+++ b/cpp/src/dual_simplex/solution.hpp
@@ -39,7 +39,7 @@ class lp_solution_t {
   std::vector<f_t> x;
   // Dual solution vector. Lagrange multipliers for equality constraints.
   std::vector<f_t> y;
-  // Dual solution vector. Lagrange multipliers for inequality constraints.
+  // Reduced costs
   std::vector<f_t> z;
   f_t objective;
   f_t user_objective;

From 37445485fabc2b386be4df277d4a475c022d4ccb Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Tue, 6 Jan 2026 15:17:24 -0800
Subject: [PATCH 20/27] Also try to improve continuous variables with reduced
 cost strengthening

---
 cpp/src/dual_simplex/branch_and_bound.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp
index 58540a53b..7bd32267d 100644
--- a/cpp/src/dual_simplex/branch_and_bound.cpp
+++ b/cpp/src/dual_simplex/branch_and_bound.cpp
@@ -315,7 +315,7 @@ void branch_and_bound_t<i_t, f_t>::find_reduced_cost_fixings(f_t upper_bound)
   i_t num_fixed = 0;
   for (i_t j = 0; j < original_lp_.num_cols; j++) {
     //printf("Variable %d type %d reduced cost %e\n", j, var_types_[j], reduced_costs[j]);
-    if (var_types_[j] == variable_type_t::INTEGER && reduced_costs[j] > threshold) {
+    if (std::abs(reduced_costs[j]) > threshold) {
       const f_t lower_j = original_lp_.lower[j];
       const f_t upper_j = original_lp_.upper[j];
       const f_t abs_gap = upper_bound - root_obj;
@@ -324,7 +324,7 @@ void branch_and_bound_t<i_t, f_t>::find_reduced_cost_fixings(f_t upper_bound)
       if (lower_j > -inf && reduced_costs[j] > 0)
       {
         const f_t new_upper_bound = lower_j + abs_gap/reduced_costs[j];
-        reduced_cost_upper_bound = std::floor(new_upper_bound + weaken);
+        reduced_cost_upper_bound = var_types_[j] == variable_type_t::INTEGER ? std::floor(new_upper_bound + weaken) : new_upper_bound;
         if (reduced_cost_upper_bound < upper_j)
         {
           //printf("Improved upper bound for variable %d from %e to %e (%e)\n", j, upper_j, reduced_cost_upper_bound, new_upper_bound);
@@ -336,7 +336,7 @@ void branch_and_bound_t<i_t, f_t>::find_reduced_cost_fixings(f_t upper_bound)
       if (upper_j < inf && reduced_costs[j] < 0)
       {
         const f_t new_lower_bound = upper_j + abs_gap/reduced_costs[j];
-        reduced_cost_lower_bound = std::ceil(new_lower_bound - weaken);
+        reduced_cost_lower_bound = var_types_[j] == variable_type_t::INTEGER ? std::ceil(new_lower_bound - weaken) : new_lower_bound;
         if (reduced_cost_lower_bound > lower_j)
         {
           //printf("Improved lower bound for variable %d from %e to %e (%e)\n", j, lower_j, reduced_cost_lower_bound, new_lower_bound);
@@ -345,7 +345,7 @@ void branch_and_bound_t<i_t, f_t>::find_reduced_cost_fixings(f_t upper_bound)
           bounds_changed[j] = true;
         }
       }
-      if (reduced_cost_upper_bound <= reduced_cost_lower_bound)
+      if (var_types_[j] == variable_type_t::INTEGER && reduced_cost_upper_bound <= reduced_cost_lower_bound)
       {
         num_fixed++;
       }

From f8e6fbecf94120dcb34b909ff604e156b1861439 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Tue, 6 Jan 2026 16:39:48 -0800
Subject: [PATCH 21/27] Fix performance bug in set_quadratic_objective_matrix

---
 .../optimization_problem.cu                   | 109 ++++++++++++------
 1 file changed, 76 insertions(+), 33 deletions(-)

diff --git a/cpp/src/linear_programming/optimization_problem.cu b/cpp/src/linear_programming/optimization_problem.cu
index 72d75cdc7..69c8b7094 100644
--- a/cpp/src/linear_programming/optimization_problem.cu
+++ b/cpp/src/linear_programming/optimization_problem.cu
@@ -158,45 +158,88 @@ void optimization_problem_t<i_t, f_t>::set_quadratic_objective_matrix(
   // Replace Q with Q + Q^T
   i_t qn    = size_offsets - 1;  // Number of variables
   i_t q_nnz = size_indices;
-  Q_offsets_.resize(qn + 1);
-  std::fill(Q_offsets_.begin(), Q_offsets_.end(), 0);
-  Q_indices_.reserve(2 * q_nnz);
-  Q_values_.reserve(2 * q_nnz);
-
-  // TODO: This is very inefficient for large Q matrices
-  // Build a map from (row,col) to value for Q+Q^T
-  std::map<std::pair<i_t, i_t>, f_t> Q_map;
-  for (i_t row = 0; row < qn; ++row) {
-    size_t start = Q_offsets[row];
-    size_t end   = Q_offsets[row + 1];
-    for (size_t idx = start; idx < end; ++idx) {
-      i_t col = Q_indices[idx];
-      f_t val = Q_values[idx];
-      auto ij = std::make_pair(row, col);
-      auto ji = std::make_pair(col, row);
-      Q_map[ij] += val;
-      Q_map[ji] += val;
+
+
+  // Construct H = Q + Q^T in triplet form first
+  // Then covert the triplet to CSR
+
+  std::vector<i_t> H_i;
+  std::vector<i_t> H_j;
+  std::vector<f_t> H_x;
+
+  H_i.reserve(2 * q_nnz);
+  H_j.reserve(2 * q_nnz);
+  H_x.reserve(2 * q_nnz);
+
+  for (i_t i = 0; i < qn; ++i) {
+    i_t row_start = Q_offsets[i];
+    i_t row_end   = Q_offsets[i + 1];
+    for (i_t p = row_start; p < row_end; ++p) {
+      i_t j = Q_indices[p];
+      f_t x = Q_values[p];
+      // Add H(i,j)
+      H_i.push_back(i);
+      H_j.push_back(j);
+      H_x.push_back(x);
+      if (i != j) {
+        // Add H(j,i)
+        H_i.push_back(j);
+        H_j.push_back(i);
+        H_x.push_back(x);
+      }
     }
   }
 
-  // Write map into CSR format (rows are built in key order, so each row's columns are sorted)
-  for (i_t row = 0; row < qn; ++row) {
-    for (auto it = Q_map.lower_bound(std::make_pair(row, 0));
-         it != Q_map.upper_bound(std::make_pair(row, std::numeric_limits<i_t>::max()));
-         ++it) {
-      i_t col = it->first.second;
-      f_t v   = it->second;
-      if (v != 0.0) {
-        Q_indices_.push_back(col);
-        Q_values_.push_back(v);
-        Q_offsets_[row + 1]++;
+  // Convert H to CSR format
+  // Get row counts
+  i_t H_nz = H_x.size();
+  std::vector<i_t> H_row_counts(qn, 0);
+  for (i_t k = 0; k < H_nz; ++k) {
+    H_row_counts[H_i[k]]++;
+  }
+  std::vector<i_t> H_cumulative_counts(qn + 1, 0);
+  for (i_t k = 0; k < qn; ++k) {
+    H_cumulative_counts[k + 1] = H_cumulative_counts[k] + H_row_counts[k];
+  }
+  std::vector<i_t> H_row_starts = H_cumulative_counts;
+  std::vector<i_t> H_indices(H_nz);
+  std::vector<f_t> H_values(H_nz);
+  for (i_t k = 0; k < H_nz; ++k) {
+    i_t p = H_cumulative_counts[H_i[k]]++;
+    H_indices[p] = H_j[k];
+    H_values[p] = H_x[k];
+  }
+
+  // H_row_starts, H_indices, H_values are the CSR representation of H
+  // But this contains duplicate entries
+
+  std::vector<i_t> workspace(qn, -1);
+  Q_offsets_.resize(qn + 1);
+  std::fill(Q_offsets_.begin(), Q_offsets_.end(), 0);
+  Q_indices_.resize(H_nz);
+  Q_values_.resize(H_nz);
+  i_t nz = 0;
+  for (i_t i = 0; i < qn; ++i)
+  {
+    i_t q = nz;                                 // row i will start at q
+    const i_t row_start = H_row_starts[i];
+    const i_t row_end = H_row_starts[i + 1];
+    for (i_t p = row_start; p < row_end; ++p) {
+      i_t j = H_indices[p];
+      if (workspace[j] >= q) {
+        Q_values_[workspace[j]] += H_values[p];  // H(i,j) is duplicate
+      } else {
+        workspace[j] = nz;                      // record where column j occurs
+        Q_indices_[nz] = j;                     // keep H(i,j)
+        Q_values_[nz] = H_values[p];
+        nz++;
       }
     }
+    Q_offsets_[i] = q;                          // record start of row i
   }
-  // Convert Q_offsets_new to cumulative sum
-  for (i_t i = 0; i < qn; ++i) {
-    Q_offsets_[i + 1] += Q_offsets_[i];
-  }
+  Q_offsets_[qn] = nz;                          // finalize Q
+  Q_indices_.resize(nz);
+  Q_values_.resize(nz);
 
   // FIX ME:: check for positive semi definite matrix
 }

From 6fc7e990b4489e1a408dbec42982a0011c24a9d5 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Wed, 7 Jan 2026 15:39:51 -0800
Subject: [PATCH 22/27] Fix cut scoring when keeping around old cuts that may
 not be violated

---
 cpp/src/dual_simplex/cuts.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp
index 5bc8b1b86..0687a7afe 100644
--- a/cpp/src/dual_simplex/cuts.cpp
+++ b/cpp/src/dual_simplex/cuts.cpp
@@ -88,6 +88,7 @@ void cut_pool_t<i_t, f_t>::score_cuts(std::vector<f_t>& x_relax)
 {
   const f_t weight_distance = 1.0;
   const f_t weight_orthogonality = 1.0;
+  const f_t min_cut_distance = 1e-4;
   cut_distances_.resize(cut_storage_.m, 0.0);
   cut_norms_.resize(cut_storage_.m, 0.0);
   cut_orthogonality_.resize(cut_storage_.m, 1);
@@ -95,7 +96,7 @@ void cut_pool_t<i_t, f_t>::score_cuts(std::vector<f_t>& x_relax)
   for (i_t i = 0; i < cut_storage_.m; i++) {
     f_t violation;
     cut_distances_[i] = cut_distance(i, x_relax, violation, cut_norms_[i]);
-    cut_scores_[i] = weight_distance * cut_distances_[i]  + weight_orthogonality * cut_orthogonality_[i];
+    cut_scores_[i] = cut_distances_[i] <= min_cut_distance ? 0.0 : weight_distance * cut_distances_[i]  + weight_orthogonality * cut_orthogonality_[i];
     //settings_.log.printf("Cut %d distance %e violation %e orthogonality %e score %e\n", i, cut_distances_[i], violation, cut_orthogonality_[i], cut_scores_[i]);
   }
 
@@ -111,7 +112,6 @@ void cut_pool_t<i_t, f_t>::score_cuts(std::vector<f_t>& x_relax)
 
   const i_t max_cuts = 2000;
   const f_t min_orthogonality = 0.5;
-  const f_t min_cut_distance = 1e-4;
   best_cuts_.reserve(std::min(max_cuts, cut_storage_.m));
   best_cuts_.clear();
   scored_cuts_ = 0;
@@ -120,6 +120,7 @@ void cut_pool_t<i_t, f_t>::score_cuts(std::vector<f_t>& x_relax)
     const i_t i = sorted_indices[0];
 
     if (cut_distances_[i] <= min_cut_distance) {
+        //settings_.log.printf("Cut %d distance %e <= %e. Stopping\n", i, cut_distances_[i], min_cut_distance);
         break;
     }
 
@@ -137,7 +138,7 @@ void cut_pool_t<i_t, f_t>::score_cuts(std::vector<f_t>& x_relax)
       cut_orthogonality_[j] = std::min(cut_orthogonality_[j], cut_orthogonality(i, j));
       if (cut_orthogonality_[j] >= min_orthogonality) {
         indices.push_back(j);
-        cut_scores_[j] = weight_distance * cut_distances_[j] + weight_orthogonality * cut_orthogonality_[j];
+        cut_scores_[j] = cut_distances_[j] <= min_cut_distance ? 0.0 : weight_distance * cut_distances_[j] + weight_orthogonality * cut_orthogonality_[j];
         //settings_.log.printf("Recomputed cut %d score %e\n", j, cut_scores_[j]);
       }
     }
@@ -149,6 +150,7 @@ void cut_pool_t<i_t, f_t>::score_cuts(std::vector<f_t>& x_relax)
     std::sort(sorted_indices.begin(), sorted_indices.end(), [&](i_t a, i_t b) {
         return cut_scores_[a] > cut_scores_[b];
     });
+    //settings_.log.printf("\t Sorted indicies %d\n", sorted_indices.size());
   }
 }
 

From 67b57c71bdbfaac51f4483042de55267974c31c9 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Wed, 7 Jan 2026 17:51:37 -0800
Subject: [PATCH 23/27] Reenable MIR cuts. Print out types of cut after root
 node.

---
 cpp/src/dual_simplex/basis_updates.cpp        |   2 +-
 cpp/src/dual_simplex/branch_and_bound.cpp     |  20 ++-
 cpp/src/dual_simplex/cuts.cpp                 | 154 ++++++++++++------
 cpp/src/dual_simplex/cuts.hpp                 |   2 +-
 cpp/src/dual_simplex/phase2.cpp               |   2 +-
 cpp/src/dual_simplex/pseudo_costs.cpp         |  15 +-
 .../optimization_problem.cu                   |  46 +++++-
 7 files changed, 171 insertions(+), 70 deletions(-)

diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp
index a5260be26..8ef19f236 100644
--- a/cpp/src/dual_simplex/basis_updates.cpp
+++ b/cpp/src/dual_simplex/basis_updates.cpp
@@ -1169,7 +1169,7 @@ i_t basis_update_mpf_t<i_t, f_t>::append_cuts(const csr_matrix_t<i_t, f_t>& cuts
     i_t V_nz           = 0;
     const f_t zero_tol = 1e-13;
     for (i_t h = 0; h < cuts_basic.m; h++) {
-      sparse_vector_t rhs(WT, h);
+      sparse_vector_t<i_t, f_t> rhs(WT, h);
       scatter_into_workspace(rhs);
       i_t nz = rhs.i.size();
       for (i_t k = num_updates_ - 1; k >= 0; --k) {
diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp
index 7bd32267d..c290f5bd4 100644
--- a/cpp/src/dual_simplex/branch_and_bound.cpp
+++ b/cpp/src/dual_simplex/branch_and_bound.cpp
@@ -1702,7 +1702,9 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   }
 #endif
 
-
+  i_t num_gomory_cuts = 0;
+  i_t num_mir_cuts = 0;
+  i_t num_knapsack_cuts = 0;
   for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) {
     if (num_fractional == 0) {
 #ifdef PRINT_SOLUTION
@@ -1756,7 +1758,15 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
         settings_.log.printf("No cuts found\n");
         break;
       }
-      //print_cut_types(cut_types, settings_);
+      for (i_t k = 0; k < cut_types.size(); k++) {
+        if (cut_types[k] == cut_type_t::MIXED_INTEGER_GOMORY) {
+          num_gomory_cuts++;
+        } else if (cut_types[k] == cut_type_t::MIXED_INTEGER_ROUNDING) {
+          num_mir_cuts++;
+        } else if (cut_types[k] == cut_type_t::KNAPSACK) {
+          num_knapsack_cuts++;
+        }
+      }
 
       cuts_to_add.check_matrix();
 
@@ -1914,6 +1924,12 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
     }
   }
 
+  if (num_gomory_cuts + num_mir_cuts + num_knapsack_cuts > 0) {
+    settings_.log.printf("Gomory cuts  : %d\n", num_gomory_cuts);
+    settings_.log.printf("MIR cuts     : %d\n", num_mir_cuts);
+    settings_.log.printf("Knapsack cuts: %d\n", num_knapsack_cuts);
+  }
+
   if (edge_norms_.size() != original_lp_.num_cols)
   {
     edge_norms_.resize(original_lp_.num_cols, -1.0);
diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp
index 0687a7afe..fdce8099c 100644
--- a/cpp/src/dual_simplex/cuts.cpp
+++ b/cpp/src/dual_simplex/cuts.cpp
@@ -97,7 +97,7 @@ void cut_pool_t<i_t, f_t>::score_cuts(std::vector<f_t>& x_relax)
     f_t violation;
     cut_distances_[i] = cut_distance(i, x_relax, violation, cut_norms_[i]);
     cut_scores_[i] = cut_distances_[i] <= min_cut_distance ? 0.0 : weight_distance * cut_distances_[i]  + weight_orthogonality * cut_orthogonality_[i];
-    //settings_.log.printf("Cut %d distance %e violation %e orthogonality %e score %e\n", i, cut_distances_[i], violation, cut_orthogonality_[i], cut_scores_[i]);
+    //settings_.log.printf("Cut %d type %d distance %e violation %e orthogonality %e score %e\n", i, static_cast<int>(cut_type_[i]), cut_distances_[i], violation, cut_orthogonality_[i], cut_scores_[i]);
   }
 
   std::vector<i_t> sorted_indices(cut_storage_.m);
@@ -532,14 +532,13 @@ void cut_generation_t<i_t, f_t>::generate_cuts(const lp_problem_t<i_t, f_t>& lp,
   // Generate Gomory Cuts
   generate_gomory_cuts(
     lp, settings, Arow, new_slacks, var_types, basis_update, xstar, basic_list, nonbasic_list);
-  //settings.log.printf("Generated Gomory cuts\n");
 
   // Generate Knapsack cuts
   generate_knapsack_cuts(lp, settings, Arow, new_slacks, var_types, xstar);
   //settings.log.printf("Generated Knapsack cuts\n");
 
  // Generate MIR cuts
- // generate_mir_cuts(lp, settings, Arow, var_types, xstar);
+  generate_mir_cuts(lp, settings, Arow, new_slacks, var_types, xstar);
 }
 
 template <typename i_t, typename f_t>
@@ -578,54 +577,67 @@ void cut_generation_t<i_t, f_t>::generate_mir_cuts(const lp_problem_t<i_t, f_t>&
   mixed_integer_rounding_cut_t<i_t, f_t> mir(lp.num_cols, settings);
   mir.initialize(lp, new_slacks, xstar);
 
+  std::vector<i_t> slack_map(lp.num_rows);
+  for (i_t slack : new_slacks) {
+    const i_t col_start = lp.A.col_start[slack];
+    const i_t col_end = lp.A.col_start[slack + 1];
+    const i_t col_len = col_end - col_start;
+    if (col_len != 1) {
+      printf("Generate MIR cuts: Slack %d has %d nzs in column\n", slack, col_len);
+      exit(1);
+    }
+    const i_t i = lp.A.i[col_start];
+    slack_map[i] = slack;
+  }
+
   for (i_t i = 0; i < lp.num_rows; i++) {
     sparse_vector_t<i_t, f_t> inequality(Arow, i);
     f_t inequality_rhs = lp.rhs[i];
 
     const i_t row_start = Arow.row_start[i];
     const i_t row_end = Arow.row_start[i + 1];
-    i_t last_slack = -1;
-    for (i_t p = row_start; p < row_end; p++) {
-      const i_t j = Arow.j[p];
-      const f_t a = Arow.x[p];
-      if (var_types[j] == variable_type_t::CONTINUOUS && a == 1.0 && lp.lower[j] == 0.0) {
-        last_slack = j;
-      }
-    }
+    i_t slack = slack_map[i];
 
-    if (last_slack != -1) {
-        // Remove the slack from the equality to get an inequality
-        for (i_t k = 0; k < inequality.i.size(); k++) {
-          const i_t j = inequality.i[k];
-          if (j == last_slack) {
-            inequality.x[k] = 0.0;
-          }
-        }
-
-        // inequaility'*x <= inequality_rhs
-        // But for MIR we need: inequality'*x >= inequality_rhs
-        inequality_rhs *= -1;
-        inequality.negate();
-
-        sparse_vector_t<i_t, f_t> cut(lp.num_cols, 0);
-        f_t cut_rhs;
-        i_t mir_status = mir.generate_cut(inequality, inequality_rhs, lp.upper, lp.lower, var_types, cut, cut_rhs);
-        if (mir_status == 0) {
-          f_t dot = 0.0;
-          f_t cut_norm = 0.0;
-          for (i_t k = 0; k < cut.i.size(); k++) {
-            const i_t jj = cut.i[k];
-            const f_t aj = cut.x[k];
-            dot += aj * xstar[jj];
-            cut_norm += aj * aj;
-          }
-          if (dot >= cut_rhs) {
-            continue;
-          }
-        }
+    // Remove the slack from the equality to get an inequality
+    for (i_t k = 0; k < inequality.i.size(); k++) {
+      const i_t j = inequality.i[k];
+      if (j == slack) { inequality.x[k] = 0.0; }
+    }
 
-        settings.log.printf("Adding MIR cut %d\n", i);
-        cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_ROUNDING, cut, cut_rhs);
+    // inequaility'*x <= inequality_rhs
+    // But for MIR we need: inequality'*x >= inequality_rhs
+    inequality_rhs *= -1;
+    inequality.negate();
+
+    sparse_vector_t<i_t, f_t> cut(lp.num_cols, 0);
+    f_t cut_rhs;
+    i_t mir_status =
+      mir.generate_cut(inequality, inequality_rhs, lp.upper, lp.lower, var_types, cut, cut_rhs);
+    bool add_cut = false;
+    const f_t min_cut_distance = 1e-4;
+    if (mir_status == 0) {
+      if (cut.i.size() == 0) {
+        continue;
+      }
+      mir.substitute_slacks(lp, Arow, cut, cut_rhs);
+      if (cut.i.size() == 0) {
+        continue;
+      }
+       // Check that the cut is violated
+       // The cut is of the form cut'*x >= cut_rhs
+       // We need that cut'*xstar < cut_rhs for the cut to be violated by the current relaxation solution xstar
+       f_t dot      = cut.dot(xstar);
+       f_t cut_norm = cut.norm2_squared();
+       if (dot < cut_rhs && cut_norm > 0.0) {
+        // Cut is violated. Compute it's distance
+         f_t cut_distance = (cut_rhs - dot) / std::sqrt(cut_norm);
+         if (cut_distance > min_cut_distance) {
+           add_cut = true;
+         }
+       }
+    }
+    if (add_cut) {
+      cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_ROUNDING, cut, cut_rhs);
     }
   }
 }
@@ -950,6 +962,19 @@ void mixed_integer_rounding_cut_t<i_t, f_t>::initialize(const lp_problem_t<i_t,
 
     if (lj > -inf) { has_lower_[j] = 1; }
   }
+
+#if 0
+  for (i_t j = 0; j < x_workspace_.size(); j++) {
+    if (x_workspace_[j] != 0.0) {
+      printf("Initialize: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]);
+      exit(1);
+    }
+    if (x_mark_[j] != 0) {
+      printf("Initialize: Dirty x_mark_[%d] = %d\n", j, x_mark_[j]);
+      exit(1);
+    }
+  }
+#endif
 }
 
 template <typename i_t, typename f_t>
@@ -962,6 +987,22 @@ i_t mixed_integer_rounding_cut_t<i_t, f_t>::generate_cut(
   sparse_vector_t<i_t, f_t>& cut,
   f_t& cut_rhs)
 {
+#if 0
+  for (i_t j = 0; j < x_workspace_.size(); j++) {
+    if (x_workspace_[j] != 0.0) {
+      printf("Before generate_cut: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]);
+      printf("num_vars_ %d\n", num_vars_);
+      printf("x_workspace_.size() %ld\n", x_workspace_.size());
+      exit(1);
+    }
+    if (x_mark_[j] != 0) {
+      printf("Before generate_cut: Dirty x_mark_[%d] = %d\n", j, x_mark_[j]);
+      exit(1);
+    }
+  }
+#endif
+
+
   auto f = [](f_t q_1, f_t q_2) -> f_t {
     f_t q_1_hat = q_1 - std::floor(q_1);
     f_t q_2_hat = q_2 - std::floor(q_2);
@@ -1104,6 +1145,19 @@ i_t mixed_integer_rounding_cut_t<i_t, f_t>::generate_cut(
   }
 
 
+#if 0
+  for (i_t j = 0; j < x_workspace_.size(); j++) {
+    if (x_workspace_[j] != 0.0) {
+      printf("After generate_cut: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]);
+      exit(1);
+    }
+    if (x_mark_[j] != 0) {
+      printf("After generate_cut: Dirty x_mark_[%d] = %d\n", j, x_mark_[j]);
+      exit(1);
+    }
+  }
+#endif
+
   // The new cut is: g'*x >= R
   // But we want to have it in the form h'*x <= b
   cut.sort();
@@ -1111,7 +1165,7 @@ i_t mixed_integer_rounding_cut_t<i_t, f_t>::generate_cut(
   cut_rhs = R;
 
   if (cut.i.size() == 0) {
-    settings_.log.printf("No coefficients in cut\n");
+    //settings_.log.printf("MIR: No coefficients in cut\n");
     return -1;
   }
 
@@ -1131,14 +1185,14 @@ void mixed_integer_rounding_cut_t<i_t, f_t>::substitute_slacks(const lp_problem_
   std::vector<i_t> cut_indices;
   cut_indices.reserve(cut.i.size());
 
-#if 1
+#if 0
   for (i_t j = 0; j < x_workspace_.size(); j++) {
     if (x_workspace_[j] != 0.0) {
-      printf("Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]);
+      printf("Begin Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]);
       exit(1);
     }
     if (x_mark_[j] != 0) {
-      printf("Dirty x_mark_[%d] = %d\n", j, x_mark_[j]);
+      printf("Begin Dirty x_mark_[%d] = %d\n", j, x_mark_[j]);
       exit(1);
     }
   }
@@ -1250,14 +1304,14 @@ void mixed_integer_rounding_cut_t<i_t, f_t>::substitute_slacks(const lp_problem_
   }
 
 
-#if 1
+#if 0
   for (i_t j = 0; j < x_workspace_.size(); j++) {
     if (x_workspace_[j] != 0.0) {
-      printf("Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]);
+      printf("End Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]);
       exit(1);
     }
     if (x_mark_[j] != 0) {
-      printf("Dirty x_mark_[%d] = %d\n", j, x_mark_[j]);
+      printf("End Dirty x_mark_[%d] = %d\n", j, x_mark_[j]);
       exit(1);
     }
   }
diff --git a/cpp/src/dual_simplex/cuts.hpp b/cpp/src/dual_simplex/cuts.hpp
index 37359f6e8..323dfcfbb 100644
--- a/cpp/src/dual_simplex/cuts.hpp
+++ b/cpp/src/dual_simplex/cuts.hpp
@@ -229,7 +229,7 @@ class mixed_integer_gomory_base_inequality_t {
 
  private:
   std::vector<f_t> b_bar_;
-  std::vector<f_t> nonbasic_mark_;
+  std::vector<i_t> nonbasic_mark_;
   std::vector<f_t> x_workspace_;
   std::vector<i_t> x_mark_;
 };
diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp
index a3392aa07..de9077560 100644
--- a/cpp/src/dual_simplex/phase2.cpp
+++ b/cpp/src/dual_simplex/phase2.cpp
@@ -2456,7 +2456,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
 
       //primal_infeasibility = phase2::compute_initial_primal_infeasibilities(
       //  lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices);
-      if (0 && primal_infeasibility > settings.primal_tol) {
+      if (primal_infeasibility > settings.primal_tol) {
 
         const i_t nz      = infeasibility_indices.size();
         for (i_t k = 0; k < nz; ++k) {
diff --git a/cpp/src/dual_simplex/pseudo_costs.cpp b/cpp/src/dual_simplex/pseudo_costs.cpp
index 2d583213e..d65944f5e 100644
--- a/cpp/src/dual_simplex/pseudo_costs.cpp
+++ b/cpp/src/dual_simplex/pseudo_costs.cpp
@@ -389,11 +389,13 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(const lp_problem_t<i_t
   for (i_t k = 0; k < num_fractional; k++) {
     const i_t j = fractional[k];
     mutex.lock();
-    if (pseudo_cost_num_down[j] >= reliable_threshold) {
+    bool down_reliable = pseudo_cost_num_down[j] >= reliable_threshold;
+    mutex.unlock();
+    if (down_reliable) {
+      mutex.lock();
       pseudo_cost_down[k] = pseudo_cost_sum_down[j] / pseudo_cost_num_down[j];
       mutex.unlock();
     } else {
-      mutex.unlock();
       // Do trial branching on the down branch
       f_t obj = trial_branching(lp, settings, var_types, vstatus, edge_norms, j, lp.lower[j], std::floor(solution[j]));
       if (!std::isnan(obj)) {
@@ -402,17 +404,19 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(const lp_problem_t<i_t
         mutex.lock();
         pseudo_cost_sum_down[j] += change_in_obj / change_in_x;
         pseudo_cost_num_down[j]++;
-        pseudo_cost_down[k] = pseudo_cost_sum_down[j] / pseudo_cost_num_down[j];
         mutex.unlock();
+        pseudo_cost_down[k] = pseudo_cost_sum_down[j] / pseudo_cost_num_down[j];
       }
     }
 
     mutex.lock();
-    if (pseudo_cost_num_up[j] >= reliable_threshold) {
+    bool up_reliable = pseudo_cost_num_up[j] >= reliable_threshold;
+    mutex.unlock();
+    if (up_reliable) {
+      mutex.lock();
       pseudo_cost_up[k] = pseudo_cost_sum_up[j] / pseudo_cost_num_up[j];
       mutex.unlock();
     } else {
-      mutex.unlock();
       // Do trial branching on the up branch
       f_t obj = trial_branching(lp, settings, var_types, vstatus, edge_norms, j, std::ceil(solution[j]), lp.upper[j]);
       if (!std::isnan(obj)) {
@@ -446,7 +450,6 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(const lp_problem_t<i_t
   log.printf(
     "pc branching on %d. Value %e. Score %e\n", branch_var, solution[branch_var], score[select]);
 
-  mutex.unlock();
 
   return branch_var;
 }
diff --git a/cpp/src/linear_programming/optimization_problem.cu b/cpp/src/linear_programming/optimization_problem.cu
index 69c8b7094..ebf617774 100644
--- a/cpp/src/linear_programming/optimization_problem.cu
+++ b/cpp/src/linear_programming/optimization_problem.cu
@@ -181,12 +181,10 @@ void optimization_problem_t<i_t, f_t>::set_quadratic_objective_matrix(
       H_i.push_back(i);
       H_j.push_back(j);
       H_x.push_back(x);
-      if (i != j) {
-        // Add H(j,i)
-        H_i.push_back(j);
-        H_j.push_back(i);
-        H_x.push_back(x);
-      }
+      // Add H(j,i)
+      H_i.push_back(j);
+      H_j.push_back(i);
+      H_x.push_back(x);
     }
   }
 
@@ -202,13 +200,43 @@ void optimization_problem_t<i_t, f_t>::set_quadratic_objective_matrix(
     H_cumulative_counts[k + 1] = H_cumulative_counts[k] + H_row_counts[k];
   }
   std::vector<i_t> H_row_starts = H_cumulative_counts;
+  std::vector<i_t> H_map(H_nz);
   std::vector<i_t> H_indices(H_nz);
   std::vector<f_t> H_values(H_nz);
   for (i_t k = 0; k < H_nz; ++k) {
-    i_t p = H_cumulative_counts[H_i[k]]++;
-    H_indices[p] = H_j[k];
-    H_values[p] = H_x[k];
+    const i_t p = H_cumulative_counts[H_i[k]]++;
+    H_map[k] = p;
   }
+  rmm::device_uvector<i_t> d_H_map(H_nz, stream_view_);
+  rmm::device_uvector<i_t> d_H_j(H_nz, stream_view_);
+  rmm::device_uvector<f_t> d_H_x(H_nz, stream_view_);
+  rmm::device_uvector<i_t> d_H_indices(H_nz, stream_view_);
+  rmm::device_uvector<f_t> d_H_values(H_nz, stream_view_);
+
+  raft::copy(d_H_map.data(), H_map.data(), H_nz, stream_view_);
+  raft::copy(d_H_j.data(), H_j.data(), H_nz, stream_view_);
+  raft::copy(d_H_x.data(), H_x.data(), H_nz, stream_view_);
+  stream_view_.synchronize();
+  thrust::for_each_n(rmm::exec_policy(stream_view_),
+                     thrust::make_counting_iterator<i_t>(0),
+                     H_nz,
+                     [span_H_map = cuopt::make_span(d_H_map),
+                      span_H_j = cuopt::make_span(d_H_j),
+                      span_H_indices = cuopt::make_span(d_H_indices)] __device__(i_t k) {
+                      span_H_indices[span_H_map[k]] = span_H_j[k];
+                     });
+  thrust::for_each_n(rmm::exec_policy(stream_view_),
+                     thrust::make_counting_iterator<i_t>(0),
+                     H_nz,
+                     [span_H_map = cuopt::make_span(d_H_map),
+                      span_H_x = cuopt::make_span(d_H_x),
+                      span_H_values = cuopt::make_span(d_H_values)] __device__(i_t k) {
+                      span_H_values[span_H_map[k]] = span_H_x[k];
+                     });
+
+  raft::copy(H_indices.data(), d_H_indices.data(), H_nz, stream_view_);
+  raft::copy(H_values.data(), d_H_values.data(), H_nz, stream_view_);
+  stream_view_.synchronize();
 
   // H_row_starts, H_indices, H_values are the CSR representation of H
   // But this contains duplicate entries

From b70439087b76e9ee13f205464e832a0d7eba8a36 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Thu, 8 Jan 2026 15:41:46 -0800
Subject: [PATCH 24/27] Fix bug in crossover leading to crash on glass-sc. Add
 node_limit and reliability_branching parameters

---
 .../cuopt/linear_programming/constants.h      |  2 +
 .../mip/solver_settings.hpp                   |  2 +
 cpp/src/dual_simplex/branch_and_bound.cpp     | 77 +++++++++++++------
 cpp/src/dual_simplex/crossover.cpp            | 26 ++++---
 cpp/src/dual_simplex/cuts.cpp                 | 14 +++-
 cpp/src/dual_simplex/cuts.hpp                 | 14 ++--
 cpp/src/dual_simplex/phase2.cpp               |  6 +-
 .../dual_simplex/simplex_solver_settings.hpp  |  2 +
 cpp/src/math_optimization/solver_settings.cu  |  2 +
 cpp/src/mip/solver.cu                         |  2 +
 10 files changed, 102 insertions(+), 45 deletions(-)

diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h
index ab78391c9..2a4a781c1 100644
--- a/cpp/include/cuopt/linear_programming/constants.h
+++ b/cpp/include/cuopt/linear_programming/constants.h
@@ -58,6 +58,8 @@
 #define CUOPT_MIP_SCALING                 "mip_scaling"
 #define CUOPT_MIP_PRESOLVE                "mip_presolve"
 #define CUOPT_MIP_CUT_PASSES              "mip_cut_passes"
+#define CUOPT_MIP_NODE_LIMIT              "mip_node_limit"
+#define CUOPT_MIP_RELIABILITY_BRANCHING   "mip_reliability_branching"
 #define CUOPT_SOLUTION_FILE               "solution_file"
 #define CUOPT_NUM_CPU_THREADS             "num_cpu_threads"
 #define CUOPT_NUM_GPUS                    "num_gpus"
diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp
index 8f7efdea9..65a4d4bd0 100644
--- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp
+++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp
@@ -79,6 +79,8 @@ class mip_solver_settings_t {
   tolerances_t tolerances;
 
   f_t time_limit       = std::numeric_limits<f_t>::infinity();
+  i_t node_limit       = std::numeric_limits<i_t>::max();
+  i_t reliability_branching = -1;
   bool heuristics_only = false;
   i_t num_cpu_threads  = -1;  // -1 means use default number of threads in branch and bound
   i_t max_cut_passes   = 10;  // number of cut passes to make
diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp
index c290f5bd4..d851d723c 100644
--- a/cpp/src/dual_simplex/branch_and_bound.cpp
+++ b/cpp/src/dual_simplex/branch_and_bound.cpp
@@ -324,9 +324,10 @@ void branch_and_bound_t<i_t, f_t>::find_reduced_cost_fixings(f_t upper_bound)
       if (lower_j > -inf && reduced_costs[j] > 0)
       {
         const f_t new_upper_bound = lower_j + abs_gap/reduced_costs[j];
-        reduced_cost_upper_bound = var_types_[j] == variable_type_t::INTEGER ? std::floor(new_upper_bound + weaken) : new_upper_bound;
-        if (reduced_cost_upper_bound < upper_j)
-        {
+        reduced_cost_upper_bound  = var_types_[j] == variable_type_t::INTEGER
+                                      ? std::floor(new_upper_bound + weaken)
+                                      : new_upper_bound;
+        if (reduced_cost_upper_bound < upper_j) {
           //printf("Improved upper bound for variable %d from %e to %e (%e)\n", j, upper_j, reduced_cost_upper_bound, new_upper_bound);
           num_improved++;
           upper_bounds[j] = reduced_cost_upper_bound;
@@ -336,9 +337,10 @@ void branch_and_bound_t<i_t, f_t>::find_reduced_cost_fixings(f_t upper_bound)
       if (upper_j < inf && reduced_costs[j] < 0)
       {
         const f_t new_lower_bound = upper_j + abs_gap/reduced_costs[j];
-        reduced_cost_lower_bound = var_types_[j] == variable_type_t::INTEGER ? std::ceil(new_lower_bound - weaken) : new_lower_bound;
-        if (reduced_cost_lower_bound > lower_j)
-        {
+        reduced_cost_lower_bound  = var_types_[j] == variable_type_t::INTEGER
+                                      ? std::ceil(new_lower_bound - weaken)
+                                      : new_lower_bound;
+        if (reduced_cost_lower_bound > lower_j) {
           //printf("Improved lower bound for variable %d from %e to %e (%e)\n", j, lower_j, reduced_cost_lower_bound, new_lower_bound);
           num_improved++;
           lower_bounds[j] = reduced_cost_lower_bound;
@@ -902,13 +904,20 @@ node_solve_info_t branch_and_bound_t<i_t, f_t>::solve_node(
     } else if (leaf_objective <= upper_bound + abs_fathom_tol) {
       // Choose fractional variable to branch on
 
-#ifdef RELIABLE_BRANCHING
-      const i_t branch_var =
-        pc_.reliable_variable_selection(leaf_problem, lp_settings, var_types_, leaf_vstatus, leaf_edge_norms, leaf_fractional, leaf_solution.x, leaf_objective, lp_settings.log);
-#else
-      const i_t branch_var =
-        pc_.variable_selection(leaf_fractional, leaf_solution.x, lp_settings.log);
-#endif
+      i_t branch_var = -1;
+      if (lp_settings.reliability_branching > 0) {
+        branch_var = pc_.reliable_variable_selection(leaf_problem,
+                                                     lp_settings,
+                                                     var_types_,
+                                                     leaf_vstatus,
+                                                     leaf_edge_norms,
+                                                     leaf_fractional,
+                                                     leaf_solution.x,
+                                                     leaf_objective,
+                                                     lp_settings.log);
+      } else {
+        branch_var = pc_.variable_selection(leaf_fractional, leaf_solution.x, lp_settings.log);
+      }
 
       assert(leaf_vstatus.size() == leaf_problem.num_cols);
       search_tree.branch(
@@ -1474,7 +1483,15 @@ lp_status_t branch_and_bound_t<i_t, f_t>::solve_root_relaxation(
           nonbasic_list.push_back(j);
         }
       }
-
+      if (basic_list.size() != original_lp_.num_rows) {
+        printf("basic_list size %d != m %d\n", basic_list.size(), original_lp_.num_rows);
+        exit(1);
+      }
+      if (nonbasic_list.size() != original_lp_.num_cols - original_lp_.num_rows) {
+        printf("nonbasic_list size %d != n - m %d\n", nonbasic_list.size(), original_lp_.num_cols - original_lp_.num_rows);
+        exit(1);
+      }
+      root_crossover_settings.max_cut_passes = 3;
       // Populate the basis_update from the crossover vstatus
       basis_update.refactor_basis(
         original_lp_.A, root_crossover_settings, basic_list, nonbasic_list, crossover_vstatus_);
@@ -1482,11 +1499,13 @@ lp_status_t branch_and_bound_t<i_t, f_t>::solve_root_relaxation(
       // Set the edge norms to a default value
       edge_norms.resize(original_lp_.num_cols, -1.0);
       set_uninitialized_steepest_edge_norms<i_t, f_t>(edge_norms);
-
+      printf("Using crossover solution\n");
     } else {
+      printf("Using dual simplex solution 1\n");
       root_status = root_status_future.get();
     }
   } else {
+    printf("Using dual simplex solution\n");
     root_status = root_status_future.get();
   }
   return root_status;
@@ -1534,6 +1553,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   basis_update_mpf_t<i_t, f_t> basis_update(original_lp_.num_rows, settings_.refactor_frequency);
   lp_status_t root_status;
   if (!enable_concurrent_lp_root_solve()) {
+    printf("Non concurrent LP root solve\n");
     // RINS/SUBMIP path
     root_status = solve_linear_program_with_advanced_basis(original_lp_,
                                                            exploration_stats_.start_time,
@@ -1545,7 +1565,13 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
                                                            root_vstatus_,
                                                            edge_norms_);
   } else {
-    root_status = solve_root_relaxation(lp_settings, root_relax_soln_, root_vstatus_, basis_update, basic_list, nonbasic_list, edge_norms_);
+    root_status = solve_root_relaxation(lp_settings,
+                                        root_relax_soln_,
+                                        root_vstatus_,
+                                        basis_update,
+                                        basic_list,
+                                        nonbasic_list,
+                                        edge_norms_);
   }
   exploration_stats_.total_lp_iters      = root_relax_soln_.iterations;
   exploration_stats_.total_lp_solve_time = toc(exploration_stats_.start_time);
@@ -1705,6 +1731,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
   i_t num_gomory_cuts = 0;
   i_t num_mir_cuts = 0;
   i_t num_knapsack_cuts = 0;
+  i_t cut_pool_size = 0;
   for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) {
     if (num_fractional == 0) {
 #ifdef PRINT_SOLUTION
@@ -1755,7 +1782,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
       i_t num_cuts = cut_pool.get_best_cuts(cuts_to_add, cut_rhs, cut_types);
       if (num_cuts == 0)
       {
-        settings_.log.printf("No cuts found\n");
+        //settings_.log.printf("No cuts found\n");
         break;
       }
       for (i_t k = 0; k < cut_types.size(); k++) {
@@ -1779,7 +1806,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
       }
 #endif
 
-#if 1
+#if 0
       f_t min_cut_violation = minimum_violation(cuts_to_add, cut_rhs, root_relax_soln_.x);
       if (min_cut_violation < 1e-6) {
         settings_.log.printf("Min cut violation %e\n", min_cut_violation);
@@ -1801,8 +1828,10 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
         }
       }
 
+      cut_pool_size = cut_pool.pool_size();
+
       // Resolve the LP with the new cuts
-      settings_.log.printf("Solving LP with %d cuts (%d cut nonzeros). Cuts in pool %d. Total constraints %d\n",
+      settings_.log.debug("Solving LP with %d cuts (%d cut nonzeros). Cuts in pool %d. Total constraints %d\n",
                            num_cuts,
                            cuts_to_add.row_start[cuts_to_add.m],
                            cut_pool.pool_size(),
@@ -1918,16 +1947,18 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
         user_lower,
         num_fractional,
         0,
-        exploration_stats_.total_lp_iters.load(),
+        static_cast<f_t>(iter),
         gap.c_str(),
         toc(exploration_stats_.start_time));
     }
   }
 
   if (num_gomory_cuts + num_mir_cuts + num_knapsack_cuts > 0) {
-    settings_.log.printf("Gomory cuts  : %d\n", num_gomory_cuts);
-    settings_.log.printf("MIR cuts     : %d\n", num_mir_cuts);
-    settings_.log.printf("Knapsack cuts: %d\n", num_knapsack_cuts);
+    settings_.log.printf("Gomory cuts   : %d\n", num_gomory_cuts);
+    settings_.log.printf("MIR cuts      : %d\n", num_mir_cuts);
+    settings_.log.printf("Knapsack cuts : %d\n", num_knapsack_cuts);
+    settings_.log.printf("Cut pool size : %d\n", cut_pool_size);
+    settings_.log.printf("Size with cuts: %d constraints, %d variables, %d nonzeros\n", original_lp_.num_rows, original_lp_.num_cols, original_lp_.A.col_start[original_lp_.A.n]);
   }
 
   if (edge_norms_.size() != original_lp_.num_cols)
diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp
index 23d9a0e8e..b46085b40 100644
--- a/cpp/src/dual_simplex/crossover.cpp
+++ b/cpp/src/dual_simplex/crossover.cpp
@@ -1355,18 +1355,22 @@ crossover_status_t crossover(const lp_problem_t<i_t, f_t>& lp,
       settings.log.debug("Num flips %d\n", num_flips);
       solution = phase1_solution;
       print_crossover_info(lp, settings, vstatus, solution, "Dual phase 1 complete");
-      std::vector<f_t> edge_norms;
-      dual::status_t status = dual_phase2(
-        2, iter == 0 ? 1 : 0, start_time, lp, settings, vstatus, solution, iter, edge_norms);
-      if (toc(start_time) > settings.time_limit) {
-        settings.log.printf("Time limit exceeded\n");
-        return crossover_status_t::TIME_LIMIT;
-      }
-      if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
-        settings.log.printf("Concurrent halt\n");
-        return crossover_status_t::CONCURRENT_LIMIT;
+      dual_infeas = dual_infeasibility(lp, settings, vstatus, solution.z);
+      dual::status_t status = dual::status_t::NUMERICAL;
+      if (dual_infeas <= settings.dual_tol) {
+        std::vector<f_t> edge_norms;
+        status = dual_phase2(
+          2, iter == 0 ? 1 : 0, start_time, lp, settings, vstatus, solution, iter, edge_norms);
+        if (toc(start_time) > settings.time_limit) {
+          settings.log.printf("Time limit exceeded\n");
+          return crossover_status_t::TIME_LIMIT;
+        }
+        if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
+          settings.log.printf("Concurrent halt\n");
+          return crossover_status_t::CONCURRENT_LIMIT;
+        }
+        solution.iterations += iter;
       }
-      solution.iterations += iter;
       primal_infeas = primal_infeasibility(lp, settings, vstatus, solution.x);
       dual_infeas   = dual_infeasibility(lp, settings, vstatus, solution.z);
       primal_res    = primal_residual(lp, solution);
diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp
index fdce8099c..7632397a3 100644
--- a/cpp/src/dual_simplex/cuts.cpp
+++ b/cpp/src/dual_simplex/cuts.cpp
@@ -1444,7 +1444,7 @@ i_t add_cuts(const simplex_solver_settings_t<i_t, f_t>& settings,
       return -1;
     }
     in_basis[j] = k;
-    if (j < cuts.n) { C_B_nz += C_col_degree[j]; }
+    if (j < cuts.n) { C_B_nz += C_col_degree[j]; } else { printf("j >= cuts.n %d %d\n", j, cuts.n); }
   }
   settings.log.debug("Done estimating C_B_nz\n");
 
@@ -1466,7 +1466,15 @@ i_t add_cuts(const simplex_solver_settings_t<i_t, f_t>& settings,
   C_B.row_start[p] = nz;
 
   if (nz != C_B_nz) {
-    settings.log.printf("predicted nz %d actual nz %d\n", C_B_nz, nz);
+    settings.log.printf("Add cuts: predicted nz %d actual nz %d\n", C_B_nz, nz);
+    for (i_t i = 0; i < p; i++) {
+      const i_t row_start = cuts.row_start[i];
+      const i_t row_end = cuts.row_start[i + 1];
+      for (i_t q = row_start; q < row_end; q++) {
+        const i_t j = cuts.j[q];
+        printf("C(%d, %d) = %e\n", i, j, C_B.x[q]);
+      }
+    }
     return -1;
   }
   settings.log.debug("C_B rows %d cols %d nz %d\n", C_B.m, C_B.n, nz);
@@ -1641,7 +1649,7 @@ void remove_cuts(lp_problem_t<i_t, f_t>& lp,
     y             = new_solution_y;
     z             = new_solution_z;
 
-    settings.log.printf("Removed %d cuts. After removal %d rows %d columns %d nonzeros\n",
+    settings.log.debug("Removed %d cuts. After removal %d rows %d columns %d nonzeros\n",
                         cuts_to_remove.size(),
                         lp.num_rows,
                         lp.num_cols,
diff --git a/cpp/src/dual_simplex/cuts.hpp b/cpp/src/dual_simplex/cuts.hpp
index 323dfcfbb..e7014e546 100644
--- a/cpp/src/dual_simplex/cuts.hpp
+++ b/cpp/src/dual_simplex/cuts.hpp
@@ -78,7 +78,7 @@ class cut_pool_t {
   }
 
   // Add a cut in the form: cut'*x >= rhs.
-  // We expect that the cut is violated by the current relaxation
+  // We expect that the cut is violated by the current relaxation xstar
   // cut'*xstart < rhs
   void add_cut(cut_type_t cut_type, const sparse_vector_t<i_t, f_t>& cut, f_t rhs);
 
@@ -137,10 +137,14 @@ class knapsack_generation_t {
   const std::vector<i_t>& get_knapsack_constraints() const { return knapsack_constraints_; }
 
  private:
-
-  f_t greedy_knapsack_problem(const std::vector<f_t>& values, const std::vector<f_t>& weights, f_t rhs, std::vector<f_t>& solution);
-  f_t solve_knapsack_problem(const std::vector<f_t>& values, const std::vector<f_t>& weights, f_t rhs, std::vector<f_t>& solution);
-
+  f_t greedy_knapsack_problem(const std::vector<f_t>& values,
+                              const std::vector<f_t>& weights,
+                              f_t rhs,
+                              std::vector<f_t>& solution);
+  f_t solve_knapsack_problem(const std::vector<f_t>& values,
+                             const std::vector<f_t>& weights,
+                             f_t rhs,
+                             std::vector<f_t>& solution);
 
   std::vector<i_t> is_slack_;
   std::vector<i_t> knapsack_constraints_;
diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp
index de9077560..88018b74e 100644
--- a/cpp/src/dual_simplex/phase2.cpp
+++ b/cpp/src/dual_simplex/phase2.cpp
@@ -2083,7 +2083,7 @@ void prepare_optimality(i_t info,
     }
   }
 
-  if (primal_infeas > settings.primal_tol)
+  if (primal_infeas > 10.0*settings.primal_tol)
   {
     printf("Primal infeasibility %e. Info %d\n", primal_infeas, info);
   }
@@ -2465,12 +2465,12 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
           const f_t val            = squared_infeas / delta_y_steepest_edge[j];
           if (squared_infeas >= 0.0 && delta_y_steepest_edge[j] < 0.0) {
             printf("Iter %d potential leaving %d val %e squared infeas %e delta_y_steepest_edge %e\n", iter, j, val, squared_infeas, delta_y_steepest_edge[j]);
-            delta_y_steepest_edge[j] = 1e-4;
+            //delta_y_steepest_edge[j] = 1e-4;
           }
         }
 
         //printf("No leaving variable. Updated primal infeasibility: %e\n", primal_infeasibility);
-        continue;
+        //continue;
       }
 
       phase2::prepare_optimality(0,
diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp
index 53ffcf209..7dbf0e1cc 100644
--- a/cpp/src/dual_simplex/simplex_solver_settings.hpp
+++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp
@@ -75,6 +75,7 @@ struct simplex_solver_settings_t {
       max_cut_passes(10),
       random_seed(0),
       inside_mip(0),
+      reliability_branching(-1),
       solution_callback(nullptr),
       heuristic_preemption_callback(nullptr),
       concurrent_halt(nullptr)
@@ -142,6 +143,7 @@ struct simplex_solver_settings_t {
   i_t num_diving_threads;          // number of threads dedicated to diving
   i_t max_cut_passes;              // number of cut passes to make
   i_t inside_mip;  // 0 if outside MIP, 1 if inside MIP at root node, 2 if inside MIP at leaf node
+  i_t reliability_branching;      // -1 automatic, 0 to disable, >0 to enable reliability branching
   std::function<void(std::vector<f_t>&, f_t)> solution_callback;
   std::function<void(const std::vector<f_t>&, f_t)> node_processed_callback;
   std::function<void()> heuristic_preemption_callback;
diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu
index 93e8df5cd..8ae1fa51b 100644
--- a/cpp/src/math_optimization/solver_settings.cu
+++ b/cpp/src/math_optimization/solver_settings.cu
@@ -88,6 +88,8 @@ solver_settings_t<i_t, f_t>::solver_settings_t() : pdlp_settings(), mip_settings
     {CUOPT_ORDERING, &pdlp_settings.ordering, -1, 1, -1},
     {CUOPT_BARRIER_DUAL_INITIAL_POINT, &pdlp_settings.barrier_dual_initial_point, -1, 1, -1},
     {CUOPT_MIP_CUT_PASSES, &mip_settings.max_cut_passes, -1, std::numeric_limits<i_t>::max(), 10},
+    {CUOPT_MIP_NODE_LIMIT, &mip_settings.node_limit, 0, std::numeric_limits<i_t>::max(), std::numeric_limits<i_t>::max()},
+    {CUOPT_MIP_RELIABILITY_BRANCHING, &mip_settings.reliability_branching, -1, std::numeric_limits<i_t>::max(), -1},
     {CUOPT_NUM_GPUS, &pdlp_settings.num_gpus, 1, 2, 1},
     {CUOPT_NUM_GPUS, &mip_settings.num_gpus, 1, 2, 1}
   };
diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu
index 4b39b4619..b6ffd04f1 100644
--- a/cpp/src/mip/solver.cu
+++ b/cpp/src/mip/solver.cu
@@ -163,6 +163,8 @@ solution_t<i_t, f_t> mip_solver_t<i_t, f_t>::run_solver()
 
     // Fill in the settings for branch and bound
     branch_and_bound_settings.time_limit           = timer_.remaining_time();
+    branch_and_bound_settings.node_limit           = context.settings.node_limit;
+    branch_and_bound_settings.reliability_branching = context.settings.reliability_branching;
     branch_and_bound_settings.print_presolve_stats = false;
     branch_and_bound_settings.absolute_mip_gap_tol = context.settings.tolerances.absolute_mip_gap;
     branch_and_bound_settings.relative_mip_gap_tol = context.settings.tolerances.relative_mip_gap;

From 89dafc2cf0daad143c56d1c8bc0fcc212caaf84c Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Fri, 9 Jan 2026 10:45:51 -0800
Subject: [PATCH 25/27] More info on primal infeasibility and trial branching

---
 cpp/src/dual_simplex/branch_and_bound.cpp |  2 +-
 cpp/src/dual_simplex/cuts.cpp             |  6 +-
 cpp/src/dual_simplex/phase2.cpp           | 91 ++++++++++++++++++++++-
 cpp/src/dual_simplex/pseudo_costs.cpp     | 23 ++++--
 4 files changed, 109 insertions(+), 13 deletions(-)

diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp
index d851d723c..3402c3c20 100644
--- a/cpp/src/dual_simplex/branch_and_bound.cpp
+++ b/cpp/src/dual_simplex/branch_and_bound.cpp
@@ -1501,7 +1501,7 @@ lp_status_t branch_and_bound_t<i_t, f_t>::solve_root_relaxation(
       set_uninitialized_steepest_edge_norms<i_t, f_t>(edge_norms);
       printf("Using crossover solution\n");
     } else {
-      printf("Using dual simplex solution 1\n");
+      printf("Using dual simplex solution 1: crossover status %d\n", crossover_status);
       root_status = root_status_future.get();
     }
   } else {
diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp
index 7632397a3..2827dfbc1 100644
--- a/cpp/src/dual_simplex/cuts.cpp
+++ b/cpp/src/dual_simplex/cuts.cpp
@@ -945,7 +945,7 @@ void mixed_integer_rounding_cut_t<i_t, f_t>::initialize(const lp_problem_t<i_t,
   for (i_t j = 0; j < lp.num_cols; j++) {
     if (lp.lower[j] < 0) {
       settings_.log.printf("Variable %d has negative lower bound %e\n", j, lp.lower[j]);
-      exit(1);
+      //exit(1);
     }
     const f_t uj = lp.upper[j];
     const f_t lj = lp.lower[j];
@@ -1444,7 +1444,9 @@ i_t add_cuts(const simplex_solver_settings_t<i_t, f_t>& settings,
       return -1;
     }
     in_basis[j] = k;
-    if (j < cuts.n) { C_B_nz += C_col_degree[j]; } else { printf("j >= cuts.n %d %d\n", j, cuts.n); }
+    // The cuts are on the original variables. So it is possible that
+    // a slack will be basic and thus not part of the cuts matrix
+    if (j < cuts.n) { C_B_nz += C_col_degree[j]; }
   }
   settings.log.debug("Done estimating C_B_nz\n");
 
diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp
index 88018b74e..413e4718c 100644
--- a/cpp/src/dual_simplex/phase2.cpp
+++ b/cpp/src/dual_simplex/phase2.cpp
@@ -1729,6 +1729,74 @@ f_t dual_infeasibility(const lp_problem_t<i_t, f_t>& lp,
   return sum_infeasible;
 }
 
+
+template <typename i_t, typename f_t>
+f_t primal_infeasibility_breakdown(const lp_problem_t<i_t, f_t>& lp,
+                                   const simplex_solver_settings_t<i_t, f_t>& settings,
+                                   const std::vector<variable_status_t>& vstatus,
+                                   const std::vector<f_t>& x,
+                                   f_t& basic_infeas,
+                                   f_t& nonbasic_infeas,
+                                   f_t& basic_over)
+{
+  const i_t n    = lp.num_cols;
+  f_t primal_inf = 0;
+  basic_infeas = 0.0;
+  basic_over = 0.0;
+  nonbasic_infeas = 0.0;
+  for (i_t j = 0; j < n; ++j) {
+    if (x[j] < lp.lower[j]) {
+      // x_j < l_j => -x_j > -l_j => -x_j + l_j > 0
+      const f_t infeas = -x[j] + lp.lower[j];
+      if (vstatus[j] == variable_status_t::BASIC) {
+        basic_infeas += infeas;
+        if (infeas > settings.primal_tol) {
+          basic_over += infeas;
+        }
+      } else {
+        nonbasic_infeas += infeas;
+      }
+      primal_inf += infeas;
+#ifdef PRIMAL_INFEASIBLE_DEBUG
+      if (infeas > settings.primal_tol) {
+        settings.log.printf("x %d infeas %e lo %e val %e up %e vstatus %d\n",
+                            j,
+                            infeas,
+                            lp.lower[j],
+                            x[j],
+                            lp.upper[j],
+                            static_cast<int>(vstatus[j]));
+      }
+#endif
+    }
+    if (x[j] > lp.upper[j]) {
+      // x_j > u_j => x_j - u_j > 0
+      const f_t infeas = x[j] - lp.upper[j];
+      if (vstatus[j] == variable_status_t::BASIC) {
+        basic_infeas += infeas;
+        if (infeas > settings.primal_tol) {
+          basic_over += infeas;
+        }
+      } else {
+        nonbasic_infeas += infeas;
+      }
+      primal_inf += infeas;
+#ifdef PRIMAL_INFEASIBLE_DEBUG
+      if (infeas > settings.primal_tol) {
+        settings.log.printf("x %d infeas %e lo %e val %e up %e vstatus %d\n",
+                            j,
+                            infeas,
+                            lp.lower[j],
+                            x[j],
+                            lp.upper[j],
+                            static_cast<int>(vstatus[j]));
+      }
+#endif
+    }
+  }
+  return primal_inf;
+}
+
 template <typename i_t, typename f_t>
 f_t primal_infeasibility(const lp_problem_t<i_t, f_t>& lp,
                          const simplex_solver_settings_t<i_t, f_t>& settings,
@@ -2015,6 +2083,7 @@ f_t amount_of_perturbation(const lp_problem_t<i_t, f_t>& lp, const std::vector<f
 
 template <typename i_t, typename f_t>
 void prepare_optimality(i_t info,
+                        f_t orig_primal_infeas,
                         const lp_problem_t<i_t, f_t>& lp,
                         const simplex_solver_settings_t<i_t, f_t>& settings,
                         basis_update_mpf_t<i_t, f_t>& ft,
@@ -2037,6 +2106,7 @@ void prepare_optimality(i_t info,
   sol.objective      = compute_objective(lp, sol.x);
   sol.user_objective = compute_user_objective(lp, sol.objective);
   f_t perturbation   = phase2::amount_of_perturbation(lp, objective);
+  f_t orig_perturbation = perturbation;
   if (perturbation > 1e-6 && phase == 2) {
     // Try to remove perturbation
     std::vector<f_t> unperturbed_y(m);
@@ -2085,7 +2155,19 @@ void prepare_optimality(i_t info,
 
   if (primal_infeas > 10.0*settings.primal_tol)
   {
-    printf("Primal infeasibility %e. Info %d\n", primal_infeas, info);
+    f_t basic_infeas = 0.0;
+    f_t nonbasic_infeas = 0.0;
+    f_t basic_over = 0.0;
+    phase2::primal_infeasibility_breakdown(lp, settings, vstatus, x, basic_infeas, nonbasic_infeas, basic_over);
+    printf("Primal infeasibility %e/%e (Basic %e, Nonbasic %e, Basic over %e). Perturbation %e/%e. Info %d\n",
+           primal_infeas,
+           orig_primal_infeas,
+           basic_infeas,
+           nonbasic_infeas,
+           basic_over,
+           orig_perturbation,
+           perturbation,
+           info);
   }
 }
 
@@ -2454,8 +2536,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
 
 #endif
 
-      //primal_infeasibility = phase2::compute_initial_primal_infeasibilities(
-      //  lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices);
+      primal_infeasibility = phase2::compute_initial_primal_infeasibilities(
+        lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices);
       if (primal_infeasibility > settings.primal_tol) {
 
         const i_t nz      = infeasibility_indices.size();
@@ -2474,6 +2556,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
       }
 
       phase2::prepare_optimality(0,
+                                 primal_infeasibility,
                                  lp,
                                  settings,
                                  ft,
@@ -2641,6 +2724,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
             obj = phase2::compute_perturbed_objective(objective, x);
             if (dual_infeas <= settings.dual_tol && primal_infeasibility <= settings.primal_tol) {
               phase2::prepare_optimality(1,
+                                         primal_infeasibility,
                                          lp,
                                          settings,
                                          ft,
@@ -2679,6 +2763,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase,
             if (primal_infeasibility <= settings.primal_tol &&
                 orig_dual_infeas <= settings.dual_tol) {
               phase2::prepare_optimality(2,
+                                         primal_infeasibility,
                                          lp,
                                          settings,
                                          ft,
diff --git a/cpp/src/dual_simplex/pseudo_costs.cpp b/cpp/src/dual_simplex/pseudo_costs.cpp
index d65944f5e..f391598b1 100644
--- a/cpp/src/dual_simplex/pseudo_costs.cpp
+++ b/cpp/src/dual_simplex/pseudo_costs.cpp
@@ -141,7 +141,8 @@ f_t trial_branching(const lp_problem_t<i_t, f_t>& original_lp,
                     const std::vector<f_t>& edge_norms,
                     i_t branch_var,
                     f_t branch_var_lower,
-                    f_t branch_var_upper)
+                    f_t branch_var_upper,
+                    i_t& iter)
 {
   lp_problem_t child_problem      = original_lp;
   child_problem.lower[branch_var] = branch_var_lower;
@@ -152,12 +153,11 @@ f_t trial_branching(const lp_problem_t<i_t, f_t>& original_lp,
   f_t lp_start_time              = tic();
   child_settings.iteration_limit = 200;
   lp_solution_t<i_t, f_t> solution(original_lp.num_rows, original_lp.num_cols);
-  i_t iter                               = 0;
   std::vector<variable_status_t> vstatus = root_vstatus;
   std::vector<f_t> child_edge_norms      = edge_norms;
   dual::status_t status                  = dual_phase2(
     2, 0, lp_start_time, child_problem, child_settings, vstatus, solution, iter, child_edge_norms);
-  printf("Trial branching on variable %d. Lo: %e Up: %e. Iter %d. Status %d. Obj %e\n", branch_var, child_problem.lower[branch_var], child_problem.upper[branch_var], iter, status, compute_objective(child_problem, solution.x));
+  //printf("Trial branching on variable %d. Lo: %e Up: %e. Iter %d. Status %d. Obj %e\n", branch_var, child_problem.lower[branch_var], child_problem.upper[branch_var], iter, status, compute_objective(child_problem, solution.x));
 
   if (status == dual::status_t::OPTIMAL || status == dual::status_t::ITERATION_LIMIT || status == dual::status_t::CUTOFF) {
     return compute_objective(child_problem, solution.x);
@@ -373,6 +373,9 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(const lp_problem_t<i_t
   f_t pseudo_cost_down_avg;
   f_t pseudo_cost_up_avg;
 
+  i_t iter = 0;
+  i_t trial_branches = 0;
+
   initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg);
 
   mutex.unlock();
@@ -397,7 +400,10 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(const lp_problem_t<i_t
       mutex.unlock();
     } else {
       // Do trial branching on the down branch
-      f_t obj = trial_branching(lp, settings, var_types, vstatus, edge_norms, j, lp.lower[j], std::floor(solution[j]));
+      i_t trial_iter = 0;
+      f_t obj = trial_branching(lp, settings, var_types, vstatus, edge_norms, j, lp.lower[j], std::floor(solution[j]), trial_iter);
+      trial_branches++;
+      iter += trial_iter;
       if (!std::isnan(obj)) {
         f_t change_in_obj = obj - current_obj;
         f_t change_in_x = solution[j] - std::floor(solution[j]);
@@ -418,7 +424,10 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(const lp_problem_t<i_t
       mutex.unlock();
     } else {
       // Do trial branching on the up branch
-      f_t obj = trial_branching(lp, settings, var_types, vstatus, edge_norms, j, std::ceil(solution[j]), lp.upper[j]);
+      i_t trial_iter = 0;
+      f_t obj = trial_branching(lp, settings, var_types, vstatus, edge_norms, j, std::ceil(solution[j]), lp.upper[j], trial_iter);
+      trial_branches++;
+      iter += trial_iter;
       if (!std::isnan(obj)) {
         f_t change_in_obj = obj - current_obj;
         f_t change_in_x = std::ceil(solution[j]) - solution[j];
@@ -447,8 +456,8 @@ i_t pseudo_costs_t<i_t, f_t>::reliable_variable_selection(const lp_problem_t<i_t
     }
   }
 
-  log.printf(
-    "pc branching on %d. Value %e. Score %e\n", branch_var, solution[branch_var], score[select]);
+  printf(
+    "pc reliability branching on %d. Value %e. Score %e. Iter %d. Trial branches %d\n", branch_var, solution[branch_var], score[select], iter, trial_branches);
 
 
   return branch_var;

From f11838d1c57fa8152385fbab56429ff7670883e4 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Tue, 13 Jan 2026 15:25:46 -0800
Subject: [PATCH 26/27] Add aggregation for MIR cuts

---
 cpp/src/dual_simplex/branch_and_bound.cpp |  12 +-
 cpp/src/dual_simplex/cuts.cpp             | 751 +++++++++++++++++++++-
 cpp/src/dual_simplex/cuts.hpp             |  87 +++
 cpp/src/dual_simplex/sparse_matrix.cpp    |   6 +-
 cpp/src/dual_simplex/sparse_matrix.hpp    |   2 +-
 5 files changed, 825 insertions(+), 33 deletions(-)

diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp
index 971ec234b..23e512e1b 100644
--- a/cpp/src/dual_simplex/branch_and_bound.cpp
+++ b/cpp/src/dual_simplex/branch_and_bound.cpp
@@ -1799,9 +1799,19 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
           num_knapsack_cuts++;
         }
       }
+      print_cut_types(cut_types, settings_);
+      printf("Cut pool size: %d\n", cut_pool.pool_size());
 
-      cuts_to_add.check_matrix();
 
+      if (cuts_to_add.check_matrix() != 0) {
+        printf("Bad cuts matrix\n");
+        for (i_t i = 0; i < static_cast<i_t>(cut_types.size()); ++i)
+        {
+          printf("row %d cut type %d\n", i, cut_types[i]);
+        }
+        exit(-1);
+      }
+      
 #ifdef PRINT_CUTS
       csc_matrix_t<i_t, f_t> cuts_to_add_col(cuts_to_add.m, cuts_to_add.n, cuts_to_add.row_start[cuts_to_add.m]);
       cuts_to_add.to_compressed_col(cuts_to_add_col);
diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp
index 4fd5a8299..e02531eca 100644
--- a/cpp/src/dual_simplex/cuts.cpp
+++ b/cpp/src/dual_simplex/cuts.cpp
@@ -26,6 +26,24 @@ void cut_pool_t<i_t, f_t>::add_cut(cut_type_t cut_type, const sparse_vector_t<i_
     }
   }
 
+#if 0
+  std::vector<i_t> index(original_vars_, 0);
+  for (i_t p = 0; p < cut.i.size(); p++)
+  {
+    const i_t j = cut.i[p];
+    if (index[j] != 0)
+    {
+      printf("Repeated index %d in cut of size %ld\n", j, cut.i.size());
+      for  (i_t k = 0; k < cut.i.size(); k++)
+      {
+        printf("i %d val %e\n", cut.i[k], cut.x[k]);
+      }
+      exit(1);
+    }
+    index[j] = 1;
+  }
+#endif
+
   sparse_vector_t<i_t, f_t> cut_squeezed;
   cut.squeeze(cut_squeezed);
   cut_storage_.append_row(cut_squeezed);
@@ -97,7 +115,7 @@ void cut_pool_t<i_t, f_t>::score_cuts(std::vector<f_t>& x_relax)
     f_t violation;
     cut_distances_[i] = cut_distance(i, x_relax, violation, cut_norms_[i]);
     cut_scores_[i] = cut_distances_[i] <= min_cut_distance ? 0.0 : weight_distance * cut_distances_[i]  + weight_orthogonality * cut_orthogonality_[i];
-    //settings_.log.printf("Cut %d type %d distance %e violation %e orthogonality %e score %e\n", i, static_cast<int>(cut_type_[i]), cut_distances_[i], violation, cut_orthogonality_[i], cut_scores_[i]);
+    //settings_.log.printf("Cut %d type %d distance %+e violation %+e orthogonality %e score %e\n", i, static_cast<int>(cut_type_[i]), cut_distances_[i], violation, cut_orthogonality_[i], cut_scores_[i]);
   }
 
   std::vector<i_t> sorted_indices(cut_storage_.m);
@@ -590,13 +608,75 @@ void cut_generation_t<i_t, f_t>::generate_mir_cuts(const lp_problem_t<i_t, f_t>&
     slack_map[i] = slack;
   }
 
+  // Compute initial scores for all rows
+  std::vector<f_t> score(lp.num_rows, 0.0);
   for (i_t i = 0; i < lp.num_rows; i++) {
+    const i_t row_start = Arow.row_start[i];
+    const i_t row_end = Arow.row_start[i + 1];
+
+    const i_t row_nz = row_end - row_start;
+    i_t num_integer_in_row = 0;
+    for (i_t p = row_start; p < row_end; p++)
+    {
+      const i_t j = Arow.j[p];
+      if (var_types[j] == variable_type_t::INTEGER)
+      {
+        num_integer_in_row++;
+      }
+    }
+
+    if (num_integer_in_row == 0)
+    {
+      score[i] = 0.0;
+
+    } else {
+      f_t nz_score = lp.num_cols - row_nz;
+
+      const i_t slack       = slack_map[i];
+      const f_t slack_value = xstar[slack];
+
+      f_t slack_score = -std::log10(1e-16 + std::abs(slack_value));
+
+      const f_t nz_weight    = 1.0;
+      const f_t slack_weight = 1.0;
+
+      score[i] = nz_weight * nz_score + slack_weight * slack_score;
+    }
+  }
+
+  // Sort the rows by score
+  std::vector<i_t> sorted_indices(lp.num_rows);
+  std::iota(sorted_indices.begin(), sorted_indices.end(), 0);
+  std::sort(sorted_indices.begin(), sorted_indices.end(), [&](i_t a, i_t b) {
+    return score[a] > score[b];
+  });
+
+  // These data structures are used to track the rows that have been aggregated
+  // The invariant is that aggregated_rows is empty and aggregated_mark is all zeros
+  // at the beginning of each iteration of the for loop below
+  std::vector<i_t> aggregated_rows;
+  std::vector<i_t> aggregated_mark(lp.num_rows, 0);
+
+  const i_t max_cuts = std::min(lp.num_rows, 1000);
+  for (i_t h = 0; h < max_cuts; h++) {
+    // Get the row with the highest score
+    const i_t i = sorted_indices[0];
+    const f_t max_score = score[i];
+
+    const i_t row_nz = Arow.row_start[i+1] - Arow.row_start[i];
+    const i_t slack = slack_map[i];
+    const f_t slack_value = xstar[slack];
+
+    //printf("MIR %d/%d. row %d nz %d slack %e score %e\n", h, max_cuts, i, row_nz, slack_value,  max_score);
+
+    if (max_score <= 0.0) {
+      break;
+    }
+
     sparse_vector_t<i_t, f_t> inequality(Arow, i);
     f_t inequality_rhs = lp.rhs[i];
 
-    const i_t row_start = Arow.row_start[i];
-    const i_t row_end = Arow.row_start[i + 1];
-    i_t slack = slack_map[i];
+
 
     // Remove the slack from the equality to get an inequality
     for (i_t k = 0; k < inequality.i.size(); k++) {
@@ -609,39 +689,294 @@ void cut_generation_t<i_t, f_t>::generate_mir_cuts(const lp_problem_t<i_t, f_t>&
     inequality_rhs *= -1;
     inequality.negate();
 
+    // Transform the relaxation solution
+    std::vector<f_t> transformed_xstar;
+    mir.relaxation_to_nonnegative(lp, xstar, transformed_xstar);
+
+
     sparse_vector_t<i_t, f_t> cut(lp.num_cols, 0);
     f_t cut_rhs;
-    i_t mir_status =
-      mir.generate_cut(inequality, inequality_rhs, lp.upper, lp.lower, var_types, cut, cut_rhs);
     bool add_cut = false;
-    const f_t min_cut_distance = 1e-4;
-    if (mir_status == 0) {
-      if (cut.i.size() == 0) {
-        continue;
+    i_t num_aggregated = 0;
+    const i_t max_aggregated = 6;
+
+    while (!add_cut && num_aggregated < max_aggregated) {
+      //printf("\t add_cut %d num_aggregated %d nz %ld\n", static_cast<i_t>(add_cut), num_aggregated, inequality.i.size());
+
+      sparse_vector_t<i_t, f_t> transformed_inequality = inequality;
+      f_t transformed_rhs = inequality_rhs;
+
+      mir.to_nonnegative(lp, transformed_inequality, transformed_rhs);
+#if 0
+      for (i_t k = 0; k < transformed_inequality.i.size(); k++)
+      {
+        printf("transformed inequality: i %d x %e\n", transformed_inequality.i[k], transformed_inequality.x[k]);
       }
-      mir.substitute_slacks(lp, Arow, cut, cut_rhs);
-      if (cut.i.size() == 0) {
-        continue;
+#endif
+      std::vector<sparse_vector_t<i_t, f_t>> transformed_cuts;
+      std::vector<f_t> transformed_cut_rhs;
+      std::vector<f_t> transformed_violations;
+
+      //  Generate cut for delta = 1
+      {
+        sparse_vector_t<i_t, f_t> cut_1(lp.num_cols, 0);
+        f_t cut_1_rhs;
+        mir.generate_cut_nonnegative(transformed_inequality, transformed_rhs, var_types, cut_1, cut_1_rhs);
+        f_t cut_1_violation = mir.compute_violation(cut_1, cut_1_rhs, transformed_xstar);
+        if (cut_1_violation > 1e-6)
+        {
+          //printf("Cut 1: Found violation of %e\n", cut_1_violation);
+          transformed_cuts.push_back(cut_1);
+          transformed_cut_rhs.push_back(cut_1_rhs);
+          transformed_violations.push_back(cut_1_violation);
+        } else {
+          //printf("Cut 1: No violation %e\n", cut_1_violation);
+        }
+      }
+
+      // Generate a cut for delta = max { |a_j|, j in I}
+      {
+        f_t max_coeff = 0.0;
+        for (i_t k = 0; k < transformed_inequality.i.size(); k++)
+        {
+          const i_t j = transformed_inequality.i[k];
+          if (var_types[j] == variable_type_t::INTEGER)
+          {
+            const f_t abs_aj = std::abs(transformed_inequality.x[k]);
+            if (abs_aj > max_coeff)
+            {
+              max_coeff = abs_aj;
+            }
+          }
+        }
+        //printf("Cut 2 max_coeff %e size %ld\n", max_coeff, transformed_inequality.i.size());
+
+        if (max_coeff > 1e-6 && max_coeff != 1.0)
+        {
+
+          sparse_vector_t<i_t, f_t> scaled_inequality = transformed_inequality;
+          const i_t nz = transformed_inequality.i.size();
+          for (i_t k = 0; k < nz; k++)
+          {
+            scaled_inequality.x[k] /= max_coeff;
+          }
+          const f_t scaled_rhs = transformed_rhs / max_coeff;
+          sparse_vector_t<i_t, f_t> cut_2(lp.num_cols, 0);
+          f_t cut_2_rhs;
+          mir.generate_cut_nonnegative(scaled_inequality, scaled_rhs, var_types, cut_2, cut_2_rhs);
+          f_t cut_2_violation = mir.compute_violation(cut_2, cut_2_rhs, transformed_xstar);
+          if (cut_2_violation > 1e-6)
+          {
+            //printf("Cut 2: Found violation of %e\n", cut_2_violation);
+            transformed_cuts.push_back(cut_2);
+            transformed_cut_rhs.push_back(cut_2_rhs);
+            transformed_violations.push_back(cut_2_violation);
+          }
+          else {
+            //printf("Cut 2: no violation %e\n", cut_2_violation);
+          }
+
+        }
+      }
+
+      if (!transformed_violations.empty()) {
+        std::vector<i_t> permuted(transformed_violations.size());
+        std::iota(permuted.begin(), permuted.end(), 0);
+        std::sort(permuted.begin(), permuted.end(), [&](i_t i, i_t j) {
+          return transformed_violations[i] > transformed_violations[j];
+        });
+
+        // Get the biggest violation
+        const i_t best_index = permuted[0];
+        //printf("\tBest index %d\n", best_index);
+        f_t max_viol = transformed_violations[best_index];
+        cut          = transformed_cuts[best_index];
+        cut_rhs      = transformed_cut_rhs[best_index];
+
+        if (max_viol > 1e-6) {
+#if 0
+        // Divide by 1/2*violation, 1/4*violation, 1/8*violation
+        sparse_vector_t<i_t, f_t> tmp_cut = best_cut;
+        for (i_t k = 0; k < tmp_cut.i.size(); k++)
+        {
+          tmp_cut.x[k] /= (0.5 * max_viol);
+        }
+        f_t tmp_cut_rhs = best_cut_rhs / (0.5 * max_viol);
+        f_t tmp_viol = mir.compute_violations(tmp_cut, tmp_cut_rhs, transformed_xstar);
+
+        if (tmp_viol > max_viol)
+        {
+          max_viol = tmp_viol;
+          best_cut = tmp_cut;
+
+        }
+#endif
+
+          // Transform back to the original variables
+          mir.to_original(lp, cut, cut_rhs);
+          mir.remove_small_coefficients(lp.lower, lp.upper, cut, cut_rhs);
+          mir.substitute_slacks(lp, Arow, cut, cut_rhs);
+          f_t viol = mir.compute_violation(cut, cut_rhs, xstar);
+          //printf("after slacks and small coeff. Violation %e\n", viol);
+          add_cut = true;
+        }
+      }
+
+#if 0
+      add_cut = generate_single_mir_cut(
+        lp, settings, Arow, var_types, xstar, inequality, inequality_rhs, mir, cut, cut_rhs);
+#endif
+      if (add_cut) {
+        printf("\t adding cut - agg %d\n", num_aggregated);
+        cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_ROUNDING, cut, cut_rhs);
+        break;
+      } else {
+        // Perform aggregation to try and find a cut
+
+        // Find all the continuous variables in the inequality
+        i_t num_continuous    = 0;
+        f_t max_off_bound     = 0.0;
+        i_t max_off_bound_var = -1;
+        for (i_t p = 0; p < inequality.i.size(); p++) {
+          const i_t j = inequality.i[p];
+          if (var_types[j] == variable_type_t::CONTINUOUS) {
+            num_continuous++;
+
+            const f_t off_lower = lp.lower[j] > -inf ? xstar[j] - lp.lower[j] : std::abs(xstar[j]);
+            const f_t off_upper = lp.upper[j] < inf ? lp.upper[j] - xstar[j]  : std::abs(xstar[j]);
+            const f_t off_bound = std::max(off_lower, off_upper);
+            const i_t col_start = lp.A.col_start[j];
+            const i_t col_end   = lp.A.col_start[j+1];
+            const i_t col_len   = col_end - col_start;
+            if (off_bound > max_off_bound && col_len > 1) {
+              max_off_bound     = off_bound;
+              max_off_bound_var = j;
+            }
+          }
+        }
+        //printf("\tnum_continuous %d max_off_bound %e var %d\n", num_continuous, max_off_bound, max_off_bound_var);
+
+        if (num_continuous == 0 || max_off_bound < 1e-6) {
+          break;
+        }
+
+        // The variable that is farthest from its bound is used as a pivot
+        if (max_off_bound_var > 0) {
+          const i_t col_start = lp.A.col_start[max_off_bound_var];
+          const i_t col_end   = lp.A.col_start[max_off_bound_var + 1];
+          const i_t col_len   = col_end - col_start;
+          if (col_len > 1) {
+            std::vector<i_t> potential_rows;
+            potential_rows.reserve(col_len);
+
+            const f_t threshold = 1e-4;
+            for (i_t q = col_start; q < col_end; q++) {
+              const i_t i   = lp.A.i[q];
+              const f_t val = lp.A.x[q];
+              // Can't use rows that have already been aggregated
+              if (std::abs(val) > threshold && aggregated_mark[i] == 0) { potential_rows.push_back(i); }
+            }
+
+            if (!potential_rows.empty()) {
+              std::sort(potential_rows.begin(), potential_rows.end(), [&](i_t a, i_t b) {
+                return score[a] > score[b];
+              });
+
+              const i_t pivot_row = potential_rows[0];
+
+              sparse_vector_t<i_t, f_t> pivot_row_inequality(Arow, pivot_row);
+              f_t pivot_row_rhs = lp.rhs[pivot_row];
+              //printf("\tCombining with %d\n", pivot_row);
+              mir.combine_rows(lp, Arow, max_off_bound_var, pivot_row_inequality, pivot_row_rhs, inequality, inequality_rhs);
+              aggregated_rows.push_back(pivot_row);
+              aggregated_mark[pivot_row] = 1;
+            } else {
+              //printf("\tno potential rows to aggregate\n");
+              break;
+            }
+          } else {
+            printf("Bad col len\n");
+            exit(1);
+          }
+        }
+        num_aggregated++;   // Always increase so the loop terminates
       }
-       // Check that the cut is violated
-       // The cut is of the form cut'*x >= cut_rhs
-       // We need that cut'*xstar < cut_rhs for the cut to be violated by the current relaxation solution xstar
-       f_t dot      = cut.dot(xstar);
-       f_t cut_norm = cut.norm2_squared();
-       if (dot < cut_rhs && cut_norm > 0.0) {
-        // Cut is violated. Compute it's distance
-         f_t cut_distance = (cut_rhs - dot) / std::sqrt(cut_norm);
-         if (cut_distance > min_cut_distance) {
-           add_cut = true;
-         }
-       }
     }
+
     if (add_cut) {
-      cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_ROUNDING, cut, cut_rhs);
+      // We were successful in generating a cut.
+
+      // Set the score of the aggregated rows to zero
+      for (i_t row : aggregated_rows) {
+        score[row] = 0.0;
+      }
+
+      // Clear the aggregated mark
+      for (i_t row : aggregated_rows) {
+        aggregated_mark[row] = 0;
+      }
+      // Clear the aggregated rows
+      aggregated_rows.clear();
     }
+
+    // Set the score of the current row to zero
+    score[i] = 0.0;
+
+    // Re-sort the rows by score
+    // It's possible this could be made more efficient by storing the rows in a data structure
+    // that allows us to:
+    // 1. Get the row with the best score
+    // 2. Get the row with a nonzero in column j that has the best score
+    // 3. Remove the rows that have been aggregated
+    // 4. Remove the current row
+    std::iota(sorted_indices.begin(), sorted_indices.end(), 0);
+    std::sort(sorted_indices.begin(), sorted_indices.end(), [&](i_t a, i_t b) {
+      return score[a] > score[b];
+    });
   }
 }
 
+template <typename i_t, typename f_t>
+bool cut_generation_t<i_t, f_t>::generate_single_mir_cut(
+  const lp_problem_t<i_t, f_t>& lp,
+  const simplex_solver_settings_t<i_t, f_t>& settings,
+  csr_matrix_t<i_t, f_t>& Arow,
+  const std::vector<variable_type_t>& var_types,
+  const std::vector<f_t>& xstar,
+  const sparse_vector_t<i_t, f_t>& inequality,
+  f_t inequality_rhs,
+  mixed_integer_rounding_cut_t<i_t, f_t>& mir,
+  sparse_vector_t<i_t, f_t>& cut,
+  f_t& cut_rhs)
+{
+  i_t mir_status =
+    mir.generate_cut(inequality, inequality_rhs, lp.upper, lp.lower, var_types, cut, cut_rhs);
+  bool add_cut = false;
+  const f_t min_cut_distance = 1e-4;
+  if (mir_status == 0) {
+    if (cut.i.size() == 0) {
+      return false;
+    }
+    mir.substitute_slacks(lp, Arow, cut, cut_rhs);
+    if (cut.i.size() == 0) {
+      return false;
+    }
+     // Check that the cut is violated
+     // The cut is of the form cut'*x >= cut_rhs
+     // We need that cut'*xstar < cut_rhs for the cut to be violated by the current relaxation solution xstar
+     f_t dot      = cut.dot(xstar);
+     f_t cut_norm = cut.norm2_squared();
+     if (dot < cut_rhs && cut_norm > 0.0) {
+      // Cut is violated. Compute it's distance
+       f_t cut_distance = (cut_rhs - dot) / std::sqrt(cut_norm);
+       if (cut_distance > min_cut_distance) {
+         add_cut = true;
+       }
+     }
+  }
+  return add_cut;
+}
+
 
 template <typename i_t, typename f_t>
 void cut_generation_t<i_t, f_t>::generate_gomory_cuts(
@@ -925,10 +1260,13 @@ void mixed_integer_rounding_cut_t<i_t, f_t>::initialize(const lp_problem_t<i_t,
     has_upper_.resize(num_vars_, 0);
   }
 
+
   is_slack_.clear();
   is_slack_.resize(num_vars_, 0);
   slack_rows_.clear();
   slack_rows_.resize(num_vars_, 0);
+  bound_info_.clear();
+  bound_info_.resize(num_vars_, 0);
 
   for (i_t j : new_slacks) {
     is_slack_[j] = 1;
@@ -954,13 +1292,18 @@ void mixed_integer_rounding_cut_t<i_t, f_t>::initialize(const lp_problem_t<i_t,
     if (uj < inf) {
       if (uj - xstar_j <= xstar_j - lj) {
         has_upper_[j] = 1;
+        bound_info_[j] = 1;
       } else {
         has_lower_[j] = 1;
+        bound_info_[j] = -1;
       }
       continue;
     }
 
-    if (lj > -inf) { has_lower_[j] = 1; }
+    if (lj > -inf && lj != 0.0) {
+      has_lower_[j] = 1;
+      bound_info_[j] = -1;
+    }
   }
 
 #if 0
@@ -977,6 +1320,230 @@ void mixed_integer_rounding_cut_t<i_t, f_t>::initialize(const lp_problem_t<i_t,
 #endif
 }
 
+template <typename i_t, typename f_t>
+void mixed_integer_rounding_cut_t<i_t, f_t>::to_nonnegative(const lp_problem_t<i_t, f_t>& lp,
+                                                           sparse_vector_t<i_t, f_t>& inequality,
+                                                           f_t& rhs)
+{
+  const i_t nz = inequality.i.size();
+  for (i_t k = 0; k < nz; k++)
+  {
+    const i_t j = inequality.i[k];
+    const f_t aj = inequality.x[k];
+    if (bound_info_[j] == -1)
+    {
+      // v_j = x_j - l_j, v_j >= 0
+      // x_j = v_j + l_j
+      // sum_{k != j} a_k x_j + a_j x_j <= beta
+      // sum_{k != j} a_k x_j + a_j (v_j + l_j) <= beta
+      // sum_{k != j} a_k x_j + a_j v_j <= beta - a_j l_j
+      const f_t lj = lp.lower[j];
+      rhs -= aj * lj;
+    }
+    else if (bound_info_[j] == 1)
+    {
+      // w_j = u_j - x_j, w_j >= 0
+      // x_j = u_j - w_j
+      // sum_{k != j} a_k x_k + a_j x_j <= beta
+      // sum_{k != j} a_k x_k + a_j (u_j - w_j) <= beta
+      // sum_{k != j} a_k x_k - a_j w_j <= beta - a_j u_j
+      const f_t uj = lp.upper[j];
+      inequality.x[k] *= -1.0;
+      rhs -= aj * uj;
+    }
+  }
+}
+
+template <typename i_t, typename f_t>
+void mixed_integer_rounding_cut_t<i_t, f_t>::relaxation_to_nonnegative(
+  const lp_problem_t<i_t, f_t>& lp,
+  const std::vector<f_t>& xstar,
+  std::vector<f_t>& xstar_nonnegative)
+{
+  xstar_nonnegative = xstar;
+  const i_t n = lp.num_cols;
+  for (i_t j = 0; j < n; ++j)
+  {
+    if (bound_info_[j] == -1)
+    {
+      // v_j = x_j - l_j
+      const f_t lj = lp.lower[j];
+      xstar_nonnegative[j] -= lj;
+    } else if (bound_info_[j] == 1)
+    {
+      // w_j = u_j - x_j
+      const f_t uj = lp.upper[j];
+      xstar_nonnegative[j] = uj - xstar_nonnegative[j];
+    }
+  }
+}
+
+
+template <typename i_t, typename f_t>
+void mixed_integer_rounding_cut_t<i_t, f_t>::to_original(const lp_problem_t<i_t, f_t>& lp,
+                                                        sparse_vector_t<i_t, f_t>& inequality,
+                                                        f_t& rhs)
+{
+  const i_t nz = inequality.i.size();
+  for (i_t k = 0; k < nz; k++)
+  {
+    const i_t j = inequality.i[k];
+    const f_t dj = inequality.x[k];
+    if (bound_info_[j] == -1)
+    {
+      // v_j = x_j - l_j, v_j >= 0
+      // sum_{k != j} d_k x_k + d_j v_j >= beta
+      // sum_{k != j} d_k x_k + d_j (x_j - l_j) >= beta
+      // sum_{k != j} d_k x_k + d_j x_j >= beta + d_j l_j
+      const f_t lj = lp.lower[j];
+      rhs += dj * lj;
+    } else if (bound_info_[j] == 1)
+    {
+      // w_j = u_j - x_j, w_j >= 0
+      // sum_{k != j} d_k x_k + d_j w_j >= beta
+      // sum_{k != j} d_k x_k + d_j (u_j - x_j) >= beta
+      // sum_{k != j} d_k x_k - d_j x_j  >= beta - d_j u_j
+      const f_t uj = lp.upper[j];
+      inequality.x[k] *= -1.0;
+      rhs -= dj * uj;
+    }
+  }
+}
+
+template <typename i_t, typename f_t>
+void mixed_integer_rounding_cut_t<i_t, f_t>::remove_small_coefficients(
+  const std::vector<f_t>& lower_bounds,
+  const std::vector<f_t>& upper_bounds,
+  sparse_vector_t<i_t, f_t>& cut,
+  f_t& cut_rhs)
+{
+  const i_t nz = cut.i.size();
+  i_t removed = 0;
+  for (i_t k = 0; k < cut.i.size(); k++) {
+    const i_t j = cut.i[k];
+
+    // Check for small coefficients
+    const f_t aj = cut.x[k];
+    if (std::abs(aj) < 1e-6) {
+      if (aj >= 0.0 && upper_bounds[j] < inf) {
+        // Move this to the right-hand side
+        cut_rhs -= aj * upper_bounds[j];
+        cut.x[k] = 0.0;
+        removed++;
+      } else if (aj <= 0.0 && lower_bounds[j] > -inf) {
+        cut_rhs += aj * lower_bounds[j];
+        cut.x[k] = 0.0;
+        removed++;
+        continue;
+      } else {
+      }
+    }
+  }
+
+  if (removed > 0)
+  {
+    sparse_vector_t<i_t, f_t> new_cut(cut.n, 0);
+    cut.squeeze(new_cut);
+    cut = new_cut;
+  }
+}
+
+template <typename i_t, typename f_t>
+i_t mixed_integer_rounding_cut_t<i_t, f_t>::generate_cut_nonnegative(
+  const sparse_vector_t<i_t, f_t>& a,
+  f_t beta,
+  const std::vector<variable_type_t>& var_types,
+  sparse_vector_t<i_t, f_t>& cut,
+  f_t& cut_rhs)
+{
+
+  auto f = [](f_t q_1, f_t q_2) -> f_t {
+    f_t q_1_hat = q_1 - std::floor(q_1);
+    f_t q_2_hat = q_2 - std::floor(q_2);
+    return std::min(q_1_hat, q_2_hat) + q_2_hat * std::floor(q_1);
+  };
+
+  auto h = [](f_t q) -> f_t { return std::max(q, 0.0); };
+
+  std::vector<i_t> cut_indices;
+  cut_indices.reserve(a.i.size());
+  f_t R = (beta - std::floor(beta)) * std::ceil(beta);
+
+  for (i_t k = 0; k < a.i.size(); k++) {
+    const i_t jj = a.i[k];
+    f_t aj       = a.x[k];
+    if (var_types[jj] == variable_type_t::INTEGER) {
+      x_workspace_[jj] += f(aj, beta);
+      if (!x_mark_[jj] && x_workspace_[jj] != 0.0) {
+        x_mark_[jj] = 1;
+        cut_indices.push_back(jj);
+      }
+    } else {
+      x_workspace_[jj] += h(aj);
+      if (!x_mark_[jj] && x_workspace_[jj] != 0.0) {
+        x_mark_[jj] = 1;
+        cut_indices.push_back(jj);
+      }
+    }
+  }
+
+  cut.i.reserve(cut_indices.size());
+  cut.x.reserve(cut_indices.size());
+  cut.i.clear();
+  cut.x.clear();
+  for (i_t k = 0; k < cut_indices.size(); k++) {
+    const i_t j = cut_indices[k];
+    cut.i.push_back(j);
+    cut.x.push_back(x_workspace_[j]);
+    //printf("cut i %d x %e n %d\n", j, x_workspace_[j], static_cast<i_t>(var_types.size()));
+  }
+
+  // Clear the workspace
+  for (i_t jj : cut_indices) {
+    x_workspace_[jj] = 0.0;
+    x_mark_[jj]      = 0;
+  }
+
+
+#if 1
+  for (i_t j = 0; j < x_workspace_.size(); j++) {
+    if (x_workspace_[j] != 0.0) {
+      printf("After generate_cut: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]);
+      exit(1);
+    }
+    if (x_mark_[j] != 0) {
+      printf("After generate_cut: Dirty x_mark_[%d] = %d\n", j, x_mark_[j]);
+      exit(1);
+    }
+  }
+#endif
+
+  // The new cut is: g'*x >= R
+  // But we want to have it in the form h'*x <= b
+  cut.sort();
+
+  cut_rhs = R;
+
+  // Check for repeated indicies
+  std::vector<i_t> check(num_vars_, 0);
+  for (i_t p = 0; p < cut.i.size(); p++)
+  {
+    if (check[cut.i[p]] != 0)
+    {
+      printf("repeated index in generated cut\n");
+      exit(1);
+    }
+    check[cut.i[p]] == 1;
+  }
+
+  if (cut.i.size() == 0) {
+    //settings_.log.printf("MIR: No coefficients in cut\n");
+    return -1;
+  }
+
+  return 0;
+}
+
 template <typename i_t, typename f_t>
 i_t mixed_integer_rounding_cut_t<i_t, f_t>::generate_cut(
   const sparse_vector_t<i_t, f_t>& a,
@@ -987,7 +1554,7 @@ i_t mixed_integer_rounding_cut_t<i_t, f_t>::generate_cut(
   sparse_vector_t<i_t, f_t>& cut,
   f_t& cut_rhs)
 {
-#if 0
+#if 1
   for (i_t j = 0; j < x_workspace_.size(); j++) {
     if (x_workspace_[j] != 0.0) {
       printf("Before generate_cut: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]);
@@ -1118,6 +1685,8 @@ i_t mixed_integer_rounding_cut_t<i_t, f_t>::generate_cut(
 
   cut.i.reserve(cut_indices.size());
   cut.x.reserve(cut_indices.size());
+  cut.i.clear();
+  cut.x.clear();
   for (i_t k = 0; k < cut_indices.size(); k++) {
     const i_t jj = cut_indices[k];
 
@@ -1145,7 +1714,7 @@ i_t mixed_integer_rounding_cut_t<i_t, f_t>::generate_cut(
   }
 
 
-#if 0
+#if 1
   for (i_t j = 0; j < x_workspace_.size(); j++) {
     if (x_workspace_[j] != 0.0) {
       printf("After generate_cut: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]);
@@ -1164,6 +1733,18 @@ i_t mixed_integer_rounding_cut_t<i_t, f_t>::generate_cut(
 
   cut_rhs = R;
 
+  // Check for repeated indicies
+  std::vector<i_t> check(num_vars_, 0);
+  for (i_t p = 0; p < cut.i.size(); p++)
+  {
+    if (check[cut.i[p]] != 0)
+    {
+      printf("repeated index in generated cut\n");
+      exit(1);
+    }
+    check[cut.i[p]] == 1;
+  }
+
   if (cut.i.size() == 0) {
     //settings_.log.printf("MIR: No coefficients in cut\n");
     return -1;
@@ -1318,6 +1899,118 @@ void mixed_integer_rounding_cut_t<i_t, f_t>::substitute_slacks(const lp_problem_
 #endif
 }
 
+template <typename i_t, typename f_t>
+f_t mixed_integer_rounding_cut_t<i_t, f_t>::compute_violation(const sparse_vector_t<i_t, f_t>& cut,
+                                                              f_t cut_rhs,
+                                                              const std::vector<f_t>& xstar)
+{
+  f_t dot      = cut.dot(xstar);
+  f_t cut_violation = cut_rhs - dot;
+  return cut_violation;
+}
+
+template <typename i_t, typename f_t>
+void mixed_integer_rounding_cut_t<i_t, f_t>::combine_rows(const lp_problem_t<i_t, f_t>& lp,
+                                                          csr_matrix_t<i_t, f_t>& Arow,
+                                                          i_t xj,
+                                                          const sparse_vector_t<i_t, f_t>& pivot_row,
+                                                          f_t pivot_row_rhs,
+                                                          sparse_vector_t<i_t, f_t>& inequality,
+                                                          f_t& inequality_rhs)
+{
+
+#if 1
+  for (i_t k = 0; k < x_workspace_.size(); k++) {
+    if (x_workspace_[k] != 0.0) {
+      printf("Dirty x_workspace_[%d] = %e\n", k, x_workspace_[k]);
+      exit(1);
+    }
+    if (x_mark_[k] != 0) {
+      printf("Dirty x_mark_[%d] = %d\n", k, x_mark_[k]);
+      exit(1);
+    }
+  }
+#endif
+
+  indices_.clear();
+  indices_.reserve(pivot_row.i.size() + inequality.i.size());
+
+  // Find the coefficient associated with variable xj in the pivot row
+  f_t a_l_j = 0.0;
+  for (i_t k = 0; k < pivot_row.i.size(); k++) {
+    const i_t j = pivot_row.i[k];
+    if (j == xj) {
+      a_l_j = pivot_row.x[k];
+      break;
+    }
+  }
+
+  if (a_l_j == 0)
+  {
+    return;
+  }
+
+  f_t a_i_j = 0.0;
+
+  i_t nz = 0;
+  // Store the inequality in the workspace
+  // and save the coefficient associated with variable xj
+  for (i_t k = 0; k < inequality.i.size(); k++) {
+    const i_t j = inequality.i[k];
+    if (j != xj) {
+      x_workspace_[j] = inequality.x[k];
+      x_mark_[j] = 1;
+      indices_.push_back(j);
+      nz++;
+    } else {
+      a_i_j = inequality.x[k];
+    }
+  }
+
+  f_t pivot_value = a_i_j / a_l_j;
+  // Adjust the rhs of the inequality
+  inequality_rhs -= pivot_value * pivot_row_rhs;
+
+  // Adjust the coefficients of the inequality
+  // based on the nonzeros in the pivot row
+  for (i_t k = 0; k < pivot_row.i.size(); k++) {
+    const i_t j = pivot_row.i[k];
+    if (j != xj) {
+      x_workspace_[j] -= pivot_value * pivot_row.x[k];
+      if (!x_mark_[j]) {
+        x_mark_[j] = 1;
+        indices_.push_back(j);
+        nz++;
+      }
+    }
+  }
+
+  // Store the new inequality
+  inequality.i.resize(nz);
+  inequality.x.resize(nz);
+  for (i_t k = 0; k < nz; k++) {
+    inequality.i[k] = indices_[k];
+    inequality.x[k] = x_workspace_[indices_[k]];
+  }
+
+  // Check for repeated indices
+  std::vector<i_t> check(num_vars_, 0);
+  for (i_t k = 0; k < inequality.i.size(); k++)
+  {
+    if (check[inequality.i[k]] == 1) {
+      printf("repeated index\n");
+    }
+    check[inequality.i[k]] = 1;
+  }
+
+  // Clear the workspace
+  for (i_t j : indices_) {
+    x_workspace_[j] = 0.0;
+    x_mark_[j] = 0;
+  }
+  indices_.clear();
+}
+
 template <typename i_t, typename f_t>
 i_t add_cuts(const simplex_solver_settings_t<i_t, f_t>& settings,
              const csr_matrix_t<i_t, f_t>& cuts,
diff --git a/cpp/src/dual_simplex/cuts.hpp b/cpp/src/dual_simplex/cuts.hpp
index e7014e546..838ad753c 100644
--- a/cpp/src/dual_simplex/cuts.hpp
+++ b/cpp/src/dual_simplex/cuts.hpp
@@ -137,10 +137,13 @@ class knapsack_generation_t {
   const std::vector<i_t>& get_knapsack_constraints() const { return knapsack_constraints_; }
 
  private:
+  // Generate a heuristic solution to the 0-1 knapsack problem
   f_t greedy_knapsack_problem(const std::vector<f_t>& values,
                               const std::vector<f_t>& weights,
                               f_t rhs,
                               std::vector<f_t>& solution);
+
+  // Solve a 0-1 knapsack problem using dynamic programming
   f_t solve_knapsack_problem(const std::vector<f_t>& values,
                              const std::vector<f_t>& weights,
                              f_t rhs,
@@ -150,6 +153,10 @@ class knapsack_generation_t {
   std::vector<i_t> knapsack_constraints_;
 };
 
+// Forward declaration
+template <typename i_t, typename f_t>
+class mixed_integer_rounding_cut_t;
+
 template <typename i_t, typename f_t>
 class cut_generation_t {
  public:
@@ -174,6 +181,7 @@ class cut_generation_t {
                      const std::vector<i_t>& nonbasic_list);
  private:
 
+  // Generate all mixed integer gomory cuts
   void generate_gomory_cuts(const lp_problem_t<i_t, f_t>& lp,
                             const simplex_solver_settings_t<i_t, f_t>& settings,
                             csr_matrix_t<i_t, f_t>& Arow,
@@ -184,6 +192,7 @@ class cut_generation_t {
                             const std::vector<i_t>& basic_list,
                             const std::vector<i_t>& nonbasic_list);
 
+  // Generate all mixed integer rounding cuts
   void generate_mir_cuts(const lp_problem_t<i_t, f_t>& lp,
                          const simplex_solver_settings_t<i_t, f_t>& settings,
                          csr_matrix_t<i_t, f_t>& Arow,
@@ -191,12 +200,29 @@ class cut_generation_t {
                          const std::vector<variable_type_t>& var_types,
                          const std::vector<f_t>& xstar);
 
+  // Generate all knapsack cuts
   void generate_knapsack_cuts(const lp_problem_t<i_t, f_t>& lp,
                               const simplex_solver_settings_t<i_t, f_t>& settings,
                               csr_matrix_t<i_t, f_t>& Arow,
                               const std::vector<i_t>& new_slacks,
                               const std::vector<variable_type_t>& var_types,
                               const std::vector<f_t>& xstar);
+
+
+  // Generate a single MIR cut
+  bool generate_single_mir_cut(const lp_problem_t<i_t, f_t>& lp,
+                               const simplex_solver_settings_t<i_t, f_t>& settings,
+                               csr_matrix_t<i_t, f_t>& Arow,
+                               const std::vector<variable_type_t>& var_types,
+                               const std::vector<f_t>& xstar,
+                               const sparse_vector_t<i_t, f_t>& inequality,
+                               f_t inequality_rhs,
+                               mixed_integer_rounding_cut_t<i_t, f_t>& mir,
+                               sparse_vector_t<i_t, f_t>& cut,
+                              f_t& cut_rhs);
+
+
+
   cut_pool_t<i_t, f_t>& cut_pool_;
   knapsack_generation_t<i_t, f_t> knapsack_generation_;
 };
@@ -252,10 +278,59 @@ class mixed_integer_rounding_cut_t {
   {
   }
 
+  // We call initalize each cut pass
+  // it resizes the arrays
   void initialize(const lp_problem_t<i_t, f_t>& lp,
                   const std::vector<i_t>& new_slacks,
                   const std::vector<f_t>& xstar);
 
+
+  // Convert an inequality of the form: sum_j a_j x_j >= beta
+  // with l_j <= x_j <= u_j into the form:
+  // sum_{j not in L union U} d_j x_j + sum_{j in L} d_j v_j
+  // + sum_{j in U} d_j w_j >= delta,
+  // where v_j = x_j - l_j for j in L
+  // and   w_j = u_j - x_j for j in Us
+  void to_nonnegative(const lp_problem_t<i_t, f_t>& lp,
+                      sparse_vector_t<i_t, f_t>& inequality,
+                      f_t& rhs);
+
+  void relaxation_to_nonnegative(const lp_problem_t<i_t, f_t>& lp,
+                                 const std::vector<f_t>& xstar,
+                                 std::vector<f_t>& xstar_nonnegative);
+
+  // Convert an inequality of the form:
+  // sum_{j not in L union U} d_j x_j + sum_{j in L} d_j v_j
+  // + sum_{j in U} d_j w_j >= delta
+  // where v_j = x_j - l_j for j in L
+  // and   w_j = u_j - x_j for j in U
+  // back to an inequality on the original variables
+  // sum_j a_j x_j >= beta
+  void to_original(const lp_problem_t<i_t, f_t>&lp,
+                   sparse_vector_t<i_t, f_t>& inequality,
+                   f_t& rhs);
+
+  // Given a cut of the form sum_j d_j x_j >= beta
+  // with l_j <= x_j <= u_j, try to remove coefficients d_j
+  // with | d_j | < epsilon
+  void remove_small_coefficients(const std::vector<f_t>& lower_bounds,
+                                 const std::vector<f_t>& upper_bounds,
+                                 sparse_vector_t<i_t, f_t>& cut,
+                                 f_t& cut_rhs);
+
+
+  // Given an inequality sum_j a_j x_j >= beta, x_j >= 0, x_j in Z, j in I
+  // generate an MIR cut of the form sum_j d_j x_j >= delta
+  i_t generate_cut_nonnegative(const sparse_vector_t<i_t, f_t>& a,
+                               f_t beta,
+                               const std::vector<variable_type_t>& var_types,
+                               sparse_vector_t<i_t, f_t>& cut,
+                               f_t& cut_rhs);
+
+  f_t compute_violation(const sparse_vector_t<i_t, f_t>& cut,
+                        f_t cut_rhs,
+                        const std::vector<f_t>& xstar);
+
   i_t generate_cut(const sparse_vector_t<i_t, f_t>& a,
                    f_t beta,
                    const std::vector<f_t>& upper_bounds,
@@ -269,6 +344,16 @@ class mixed_integer_rounding_cut_t {
                          sparse_vector_t<i_t, f_t>& cut,
                          f_t& cut_rhs);
 
+  // Combine the pivot row with the inequality to eliminate the variable j
+  // The new inequality is returned in inequality and inequality_rhs
+  void combine_rows(const lp_problem_t<i_t, f_t>& lp,
+                    csr_matrix_t<i_t, f_t>& Arow,
+                    i_t j,
+                    const sparse_vector_t<i_t, f_t>& pivot_row,
+                    f_t pivot_row_rhs,
+                    sparse_vector_t<i_t, f_t>& inequality,
+                    f_t& inequality_rhs);
+
  private:
   i_t num_vars_;
   const simplex_solver_settings_t<i_t, f_t>& settings_;
@@ -278,6 +363,8 @@ class mixed_integer_rounding_cut_t {
   std::vector<i_t> has_upper_;
   std::vector<i_t> is_slack_;
   std::vector<i_t> slack_rows_;
+  std::vector<i_t> indices_;
+  std::vector<i_t> bound_info_;
   bool needs_complement_;
 };
 
diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp
index 07d44f2e9..f717fc352 100644
--- a/cpp/src/dual_simplex/sparse_matrix.cpp
+++ b/cpp/src/dual_simplex/sparse_matrix.cpp
@@ -632,7 +632,7 @@ size_t csc_matrix_t<i_t, f_t>::hash() const
 }
 
 template <typename i_t, typename f_t>
-void csr_matrix_t<i_t, f_t>::check_matrix(std::string matrix_name) const
+i_t csr_matrix_t<i_t, f_t>::check_matrix(std::string matrix_name) const
 {
   std::vector<i_t> col_marker(this->n, -1);
   for (i_t i = 0; i < this->m; ++i) {
@@ -640,13 +640,15 @@ void csr_matrix_t<i_t, f_t>::check_matrix(std::string matrix_name) const
     const i_t row_end   = this->row_start[i + 1];
     for (i_t p = row_start; p < row_end; ++p) {
       const i_t j = this->j[p];
-      if (j < 0 || j >= this->n) { printf("CSR Error: column index %d not in range [0, %d)\n", j, this->n); }
+      if (j < 0 || j >= this->n) { printf("CSR Error: column index %d not in range [0, %d)\n", j, this->n); return -1;}
       if (col_marker[j] == i) {
         printf("CSR Error (%s) : repeated column index %d in row %d\n", matrix_name.c_str(), j, i);
+        return -1;
       }
       col_marker[j] = i;
     }
   }
+  return 0;
 }
 
 // x <- x + alpha * A(:, j)
diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp
index 7be647270..ccf69dbe6 100644
--- a/cpp/src/dual_simplex/sparse_matrix.hpp
+++ b/cpp/src/dual_simplex/sparse_matrix.hpp
@@ -158,7 +158,7 @@ class csr_matrix_t {
   i_t append_row(const sparse_vector_t<i_t, f_t>& c);
 
   // Ensures no repeated column indices within a row
-  void check_matrix(std::string matrix_name = "") const;
+  i_t check_matrix(std::string matrix_name = "") const;
 
   bool is_diagonal() const
   {

From fb85947ad4d3e78b367a2f604a4b100acf2584b1 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Wed, 14 Jan 2026 14:11:54 -0800
Subject: [PATCH 27/27] Fix bug in knapsack cuts

---
 cpp/src/dual_simplex/branch_and_bound.cpp |  9 ++++++-
 cpp/src/dual_simplex/cuts.cpp             | 29 +++++++++++++++++------
 2 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp
index 23e512e1b..25be253bc 100644
--- a/cpp/src/dual_simplex/branch_and_bound.cpp
+++ b/cpp/src/dual_simplex/branch_and_bound.cpp
@@ -1750,6 +1750,13 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
       incumbent_.set_incumbent_solution(root_objective_, root_relax_soln_.x);
       upper_bound_ = root_objective_;
       mutex_upper_.unlock();
+      if (num_gomory_cuts + num_mir_cuts + num_knapsack_cuts > 0) {
+        settings_.log.printf("Gomory cuts   : %d\n", num_gomory_cuts);
+        settings_.log.printf("MIR cuts      : %d\n", num_mir_cuts);
+        settings_.log.printf("Knapsack cuts : %d\n", num_knapsack_cuts);
+        settings_.log.printf("Cut pool size : %d\n", cut_pool_size);
+        settings_.log.printf("Size with cuts: %d constraints, %d variables, %d nonzeros\n", original_lp_.num_rows, original_lp_.num_cols, original_lp_.A.col_start[original_lp_.A.n]);
+      }
       // We should be done here
       uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x);
       solution.objective          = incumbent_.objective;
@@ -1811,7 +1818,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
         }
         exit(-1);
       }
-      
+
 #ifdef PRINT_CUTS
       csc_matrix_t<i_t, f_t> cuts_to_add_col(cuts_to_add.m, cuts_to_add.n, cuts_to_add.row_start[cuts_to_add.m]);
       cuts_to_add.to_compressed_col(cuts_to_add_col);
diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp
index e02531eca..bd5e86033 100644
--- a/cpp/src/dual_simplex/cuts.cpp
+++ b/cpp/src/dual_simplex/cuts.cpp
@@ -228,7 +228,8 @@ knapsack_generation_t<i_t, f_t>::knapsack_generation_t(
   for (i_t i = 0; i < lp.num_rows; i++) {
     const i_t row_start = Arow.row_start[i];
     const i_t row_end   = Arow.row_start[i + 1];
-    if (row_end - row_start < 3) { continue; }
+    const i_t row_len   = row_end - row_start;
+    if (row_len < 3) { continue; }
     bool is_knapsack    = true;
     f_t sum_pos         = 0.0;
     //printf("i %d ", i);
@@ -255,9 +256,14 @@ knapsack_generation_t<i_t, f_t>::knapsack_generation_t(
 
     if (is_knapsack) {
       const f_t beta = lp.rhs[i];
-      printf("Knapsack constraint %d beta %e sum_pos %e\n", i, beta, sum_pos);
       if (std::abs(beta - std::round(beta)) <= settings.integer_tol) {
-        if (beta >= 0.0 && beta <= sum_pos) {
+        if (beta > 0.0 && beta <= sum_pos && std::abs(sum_pos / (row_len - 1) - beta) > 1e-3) {
+          printf("Knapsack constraint %d row len %d beta %e sum_pos %e sum_pos / (row_len - 1) %e\n",
+            i,
+            row_len,
+            beta,
+            sum_pos,
+            sum_pos / (row_len - 1));
           knapsack_constraints_.push_back(i);
         }
       }
@@ -322,7 +328,7 @@ i_t knapsack_generation_t<i_t, f_t>::generate_knapsack_cuts(
   for (i_t k = 0; k < knapsack_inequality.i.size(); k++) {
     const i_t j = knapsack_inequality.i[k];
     if (!is_slack_[j]) {
-      const f_t vj = 1.0 - xstar[j];
+      const f_t vj = std::min(1.0, std::max(0.0,1.0 - xstar[j]));
       objective_constant += vj;
       values[h]  = vj;
       weights[h] = knapsack_inequality.x[k];
@@ -357,6 +363,7 @@ i_t knapsack_generation_t<i_t, f_t>::generate_knapsack_cuts(
     const i_t j = knapsack_inequality.i[k];
     if (!is_slack_[j]) {
       if (solution[h] == 0.0) {
+        //printf("x%d in cover. relaxation %e\n", j, xstar[j]);
         cut.i.push_back(j);
         cut.x.push_back(-1.0);
       }
@@ -374,7 +381,15 @@ i_t knapsack_generation_t<i_t, f_t>::generate_knapsack_cuts(
   f_t violation = dot - cut_rhs;
   printf("Knapsack cut %d violation %e < 0\n", knapsack_row, violation);
 
-  if (violation <= tol) { return -1; }
+  if (violation >= -tol) { return -1; }
+
+#ifdef PRINT_KNAPSACK_CUT
+  printf("knapsack cut (cover %d): \n", cover_size);
+  for (i_t k = 0; k < cut.i.size(); k++) {
+    printf("x%d coeff %g value %g\n", cut.i[k], -cut.x[k], xstar[cut.i[k]]);
+  }
+  printf("cut_rhs %g\n", -cut_rhs);
+#endif
   return 0;
 }
 
@@ -1035,7 +1050,7 @@ void cut_generation_t<i_t, f_t>::generate_gomory_cuts(
           f_t dot      = cut_A.dot(xstar);
           f_t cut_norm = cut_A.norm2_squared();
           if (dot >= cut_A_rhs) {
-            settings.log.printf("Cut %d is not violated. Skipping\n", i);
+            //settings.log.printf("Cut %d is not violated. Skipping\n", i);
             continue;
           }
           cut_A_distance = (cut_A_rhs - dot) / std::sqrt(cut_norm);
@@ -1069,7 +1084,7 @@ void cut_generation_t<i_t, f_t>::generate_gomory_cuts(
           f_t dot      = cut_B.dot(xstar);
           f_t cut_norm = cut_B.norm2_squared();
           if (dot >= cut_B_rhs) {
-            settings.log.printf("Cut %d is not violated. Skipping\n", i);
+            //settings.log.printf("Cut %d is not violated. Skipping\n", i);
             continue;
           }
           cut_B_distance = (cut_B_rhs - dot) / std::sqrt(cut_norm);