diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5734ab3..0644cae 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,6 +6,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
 set(CMAKE_CXX_FLAGS "-O3 -march=native")
 
+option(ENABLE_MINIAPPS "Also build the miniapps. Default: OFF." OFF)
+
 # Get includes, which declares the `spblas` library
 add_subdirectory(include)
 
@@ -89,4 +91,8 @@ if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
 
   add_subdirectory(examples)
   add_subdirectory(test)
+
+  if (ENABLE_MINIAPPS)
+    add_subdirectory(miniapps)
+  endif()
 endif()
diff --git a/include/spblas/vendor/onemkl_sycl/algorithms.hpp b/include/spblas/vendor/onemkl_sycl/algorithms.hpp
index 9f582a8..cafcbcf 100644
--- a/include/spblas/vendor/onemkl_sycl/algorithms.hpp
+++ b/include/spblas/vendor/onemkl_sycl/algorithms.hpp
@@ -1,5 +1,6 @@
 #pragma once
 
+#include "gemm.hpp"
 #include "spgemm_impl.hpp"
 #include "spmm_impl.hpp"
 #include "spmv_impl.hpp"
diff --git a/include/spblas/vendor/onemkl_sycl/gemm.hpp b/include/spblas/vendor/onemkl_sycl/gemm.hpp
new file mode 100644
index 0000000..e949059
--- /dev/null
+++ b/include/spblas/vendor/onemkl_sycl/gemm.hpp
@@ -0,0 +1,51 @@
+#pragma once
+
+#include <oneapi/mkl.hpp>
+
+#include <spblas/detail/ranges.hpp>
+#include <spblas/detail/view_inspectors.hpp>
+
+namespace spblas {
+
+template <matrix A, matrix B, matrix C>
+  requires __detail::has_mdspan_matrix_base<A> &&
+           __detail::has_mdspan_matrix_base<B> &&
+           __detail::is_matrix_instantiation_of_mdspan_v<C>
+void multiply(A&& a, B&& b, C&& c) {
+  auto a_base = __detail::get_ultimate_base(a);
+  auto b_base = __detail::get_ultimate_base(a);
+
+  auto alpha_optional = __detail::get_scaling_factor(a, b);
+  tensor_scalar_t<A> alpha = alpha_optional.value_or(1);
+
+  sycl::queue q(sycl::cpu_selector_v);
+
+  /*
+  namespace oneapi::mkl::blas::row_major {
+    sycl::event gemm(sycl::queue &queue,
+                     onemkl::transpose transa,
+                     onemkl::transpose transb,
+                     std::int64_t m,
+                     std::int64_t n,
+                     std::int64_t k,
+                     Ts alpha,
+                     const Ta *a,
+                     std::int64_t lda,
+                     const Tb *b,
+                     std::int64_t ldb,
+                     Ts beta,
+                     Tc *c,
+                     std::int64_t ldc,
+                     const std::vector<sycl::event> &dependencies = {})
+                     }
+*/
+
+  oneapi::mkl::blas::row_major::gemm(
+      q, oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans,
+      __backend::shape(a)[0], __backend::shape(c)[1], __backend::shape(a)[1],
+      alpha, a_base.data_handle(), __backend::shape(a)[1], b_base.data_handle(),
+      __backend::shape(b)[1], 0, c.data_handle(), __backend::shape(c)[1])
+      .wait();
+}
+
+} // namespace spblas
diff --git a/miniapps/CMake/miniappsConfig.cmake.in b/miniapps/CMake/miniappsConfig.cmake.in
new file mode 100644
index 0000000..6e89b67
--- /dev/null
+++ b/miniapps/CMake/miniappsConfig.cmake.in
@@ -0,0 +1,22 @@
+include(CMakeFindDependencyMacro)
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}")
+
+set(miniapps_VERSION "@miniapps_VERSION@")
+set(miniapps_VERSION_MAJOR "@miniapps_VERSION_MAJOR@")
+set(miniapps_VERSION_MINOR "@miniapps_VERSION_MINOR@")
+set(miniapps_VERSION_PATCH "@miniapps_VERSION_PATCH@")
+set(miniapps_VERSION_DEVEL "@miniapps_VERSION_DEVEL@")
+
+# blas++
+if (NOT blaspp_DIR)
+    set(blaspp_DIR @blaspp_DIR@)
+endif()
+find_dependency(blaspp)
+
+# lapack++
+if (NOT lapackpp_DIR)
+    set(lapackpp_DIR @lapackpp_DIR@)
+endif()
+find_dependency(lapackpp)
+
+include(miniapps)
diff --git a/miniapps/CMake/miniappsConfigVersion.cmake.in b/miniapps/CMake/miniappsConfigVersion.cmake.in
new file mode 100644
index 0000000..582661d
--- /dev/null
+++ b/miniapps/CMake/miniappsConfigVersion.cmake.in
@@ -0,0 +1,11 @@
+set(PACKAGE_VERSION "@miniapps_VERSION@")
+
+# Check whether the requested PACKAGE_FIND_VERSION is compatible
+if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}")
+    set(PACKAGE_VERSION_COMPATIBLE FALSE)
+else()
+    set(PACKAGE_VERSION_COMPATIBLE TRUE)
+    if ("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}")
+        set(PACKAGE_VERSION_EXACT TRUE)
+    endif()
+endif()
diff --git a/miniapps/CMake/rl_build_options.cmake b/miniapps/CMake/rl_build_options.cmake
new file mode 100644
index 0000000..ed85878
--- /dev/null
+++ b/miniapps/CMake/rl_build_options.cmake
@@ -0,0 +1,28 @@
+set(CMAKE_CXX_STANDARD 23)
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+
+option(BUILD_SHARED_LIBS OFF "Configure to build shared or static libraries")
+
+if(NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE "Release"
+  CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." FORCE)
+  set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
+endif()
+
+set(SANITIZE_ADDRESS OFF CACHE BOOL "Add address sanitizer flags to the library")
+
+message(STATUS "Checking for OpenMP ... ")
+find_package(OpenMP COMPONENTS CXX)
+
+set(tmp FALSE)
+if (OpenMP_CXX_FOUND)
+    set(tmp TRUE)
+endif()
+
+set(miniapps_HAS_OpenMP ${tmp} CACHE BOOL "Set if we have a working OpenMP")
+
+include(GNUInstallDirs)
+
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}")
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}")
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}")
diff --git a/miniapps/CMake/rl_config.cmake b/miniapps/CMake/rl_config.cmake
new file mode 100644
index 0000000..8df22e8
--- /dev/null
+++ b/miniapps/CMake/rl_config.cmake
@@ -0,0 +1,11 @@
+
+configure_file(CMake/miniappsConfig.cmake.in
+    ${CMAKE_INSTALL_LIBDIR}/cmake/miniappsConfig.cmake @ONLY)
+
+configure_file(CMake/miniappsConfigVersion.cmake.in
+    ${CMAKE_INSTALL_LIBDIR}/cmake/miniappsConfigVersion.cmake @ONLY)
+
+install(FILES
+    ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}/cmake/miniappsConfig.cmake
+    ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}/cmake/miniappsConfigVersion.cmake
+    DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake)
diff --git a/miniapps/CMakeLists.txt b/miniapps/CMakeLists.txt
new file mode 100644
index 0000000..e931875
--- /dev/null
+++ b/miniapps/CMakeLists.txt
@@ -0,0 +1,45 @@
+cmake_minimum_required(VERSION 3.24)
+
+project(miniapps VERSION 0.1.0)
+
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMake")
+
+set(CMAKE_CXX_STANDARD 23)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+option(SPBLAS_MINIAPPS_ENABLE_CQRRPT "Enable the CQRRPT miniapp. Default ON." ON)
+option(SPBLAS_MINIAPPS_ENABLE_SOLVERS "Enable the solvers miniapp. Default ON." ON)
+
+enable_testing()
+
+include(rl_build_options)
+
+# Find dependencies
+find_package(blaspp 2024.10.26 QUIET)
+if(NOT blaspp_FOUND)
+  FetchContent_Declare(
+    blaspp
+    GIT_REPOSITORY https://github.com/icl-utk-edu/blaspp
+    GIT_TAG v2024.10.26)
+    FetchContent_MakeAvailable(blaspp)
+endif()
+
+set(blas "auto")
+set(lapack "auto")
+
+find_package(lapackpp 2024.10.26 QUIET)
+if(NOT lapackpp_FOUND)
+  FetchContent_Declare(
+    lapackpp
+    GIT_REPOSITORY https://github.com/icl-utk-edu/lapackpp
+    GIT_TAG v2024.10.26)
+  FetchContent_MakeAvailable(lapackpp)
+endif()
+
+# compile sources
+add_subdirectory(include)
+
+add_subdirectory(test)
+
+# export the configuration
+include(rl_config)
diff --git a/miniapps/README.md b/miniapps/README.md
new file mode 100644
index 0000000..e69de29
diff --git a/miniapps/include/CMakeLists.txt b/miniapps/include/CMakeLists.txt
new file mode 100644
index 0000000..b6f2c82
--- /dev/null
+++ b/miniapps/include/CMakeLists.txt
@@ -0,0 +1,52 @@
+set(miniapps_libs
+    lapackpp
+    blaspp
+    spblas
+)
+if (miniapps_HAS_OpenMP)
+    list(APPEND miniapps_libs OpenMP::OpenMP_CXX)
+endif()
+
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/rl_config.hh.in rl_config.hh)
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/rl_config.hh
+    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/miniapps)
+
+add_library(miniapps INTERFACE)
+target_include_directories(miniapps INTERFACE
+  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/..>
+  $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
+)
+
+set(miniapps_cxx_opts -Wall -Wextra)
+if (SANITIZE_ADDRESS)
+    list(APPEND miniapps_cxx_opts -fsanitize=address)
+    target_link_options(miniapps INTERFACE -fsanitize=address)
+endif()
+target_compile_options(miniapps INTERFACE ${miniapps_cxx_opts})
+
+target_link_libraries(miniapps INTERFACE ${miniapps_libs})
+
+install(
+    DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
+    DESTINATION include FILES_MATCHING PATTERN "*.hh"
+)
+
+install(
+    DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+    DESTINATION include FILES_MATCHING PATTERN "*.hh"
+)
+
+install(
+    TARGETS miniapps
+    EXPORT miniapps
+    INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/miniapps
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+)
+
+# install(
+#     EXPORT miniapps
+#     DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake
+#     EXPORT_LINK_INTERFACE_LIBRARIES
+# )
diff --git a/miniapps/include/bicgstab.hh b/miniapps/include/bicgstab.hh
new file mode 100644
index 0000000..7d6e751
--- /dev/null
+++ b/miniapps/include/bicgstab.hh
@@ -0,0 +1,161 @@
+#ifndef MINIAPPS_BICGSTAB_HH
+#define MINIAPPS_BICGSTAB_HH
+
+#include <algorithm>
+#include <iostream>
+#include <utility>
+#include <vector>
+
+#include "krylov.hh"
+#include "rl_blaspp.hh"
+#include "util.hh"
+#include <spblas/spblas.hpp>
+
+namespace miniapps {
+
+template <typename T>
+class BiCGSTAB : public Krylov<T> {
+public:
+  BiCGSTAB(T eps, int max_iters) : Krylov<T>{eps, max_iters} {}
+
+  /**
+   * BiCGSTAB or the Bi-Conjugate Gradient-Stabilized is a Krylov subspace
+   * solver.
+   *
+   * Being a generic solver, it is capable of solving general matrices,
+   * including non-s.p.d matrices. Though, the memory and the computational
+   * requirement of the BiCGSTAB solver are higher than of its s.p.d solver
+   * counterpart, it has the capability to solve generic systems. It was
+   * developed by stabilizing the BiCG method.
+   *
+   * We implement the BiCGSTAB variant from Section 2.3.8 of "Templates for t
+   * e Solution of Linear Systems: Building Blocks for Iterative Methods"
+   *
+   * @param[in] A  the matrix in CSR format
+   * @param[in] b  the right hand side vector
+   * @param[out] x  the solution vector
+   */
+  std::tuple<double, int> apply(const spblas::csr_view<T> a,
+                                const std::vector<T> b,
+                                std::vector<T> x) override;
+};
+
+// -----------------------------------------------------------------------------
+template <typename T>
+std::tuple<double, int> BiCGSTAB<T>::apply(const spblas::csr_view<T> a,
+                                           const std::vector<T> b,
+                                           std::vector<T> x) {
+  int iters = 0;
+  double error = 1.0;
+
+  // r = dense_b
+  // prev_rho = rho = omega = alpha = beta = gamma = 1.0
+  // rr = v = s = t = z = y = p = 0
+  std::vector<T> r(b);
+  std::vector<T> rr(b.size(), 0.0);
+  std::vector<T> v(b.size(), 0.0);
+  std::vector<T> s(b.size(), 0.0);
+  std::vector<T> t(b.size(), 0.0);
+  std::vector<T> z(b.size(), 0.0);
+  std::vector<T> y(b.size(), 0.0);
+  std::vector<T> p(b.size(), 0.0);
+  std::vector<T> tmp(b.size(), 0.0);
+
+  double prev_rho, rho, omega, alpha, beta, gamma;
+  prev_rho = rho = omega = alpha = beta = gamma = 1.0;
+
+  // r = b - Ax
+  spblas::multiply(a, x, tmp);
+  blas::axpy(r.size(), -1.0, tmp.data(), 1, r.data(), 1);
+  // rr = r
+  rr = r;
+  while (true) {
+    iters++;
+
+    // rho = dot(rr, r)
+    rho = blas::dot(r.size(), rr.data(), 1, r.data(), 1);
+
+    error = blas::nrm2(r.size(), r.data(), 1);
+    if (iters >= this->max_iters || error < this->eps) {
+      std::cout << "error = " << error << "\n";
+      std::cout << "iters = " << iters << "\n";
+      std::cout << "eps = " << this->eps << "\n";
+      std::cout << "max_iters = " << this->max_iters << "\n";
+      break;
+    }
+
+    if (prev_rho * omega == 0.0) {
+      p = r;
+    } else {
+      // beta = (rho / prev_rho) * (alpha / omega)
+      beta = (rho / prev_rho) * (alpha / omega);
+      // p = r + beta * (p - omega * v)
+      tmp = p;
+      blas::axpy(tmp.size(), -omega, v.data(), 1, tmp.data(), 1);
+      blas::scal(tmp.size(), beta, tmp.data(), 1);
+      blas::axpy(tmp.size(), 1.0, r.data(), 1, tmp.data(), 1);
+      p = tmp;
+    }
+
+    // y = preconditioner * p
+    y = p;
+    // v = A * y
+    spblas::multiply(a, y, v);
+    // beta = dot(rr, v)
+    beta = blas::dot(v.size(), rr.data(), 1, v.data(), 1);
+    if (beta == 0.0) {
+      s = r;
+    } else {
+      // alpha = rho / beta
+      alpha = rho / beta;
+      // s = r - alpha * v
+      tmp = v;
+      blas::scal(tmp.size(), -alpha, tmp.data(), 1);
+      blas::axpy(tmp.size(), 1.0, r.data(), 1, tmp.data(), 1);
+      s = tmp;
+    }
+
+    error = blas::nrm2(s.size(), s.data(), 1);
+    if (iters >= this->max_iters || error < this->eps) {
+      std::cout << "error = " << error << "\n";
+      std::cout << "iters = " << iters << "\n";
+      std::cout << "eps = " << this->eps << "\n";
+      std::cout << "max_iters = " << this->max_iters << "\n";
+      blas::axpy(x.size(), alpha, y.data(), 1, x.data(), 1);
+      break;
+    }
+
+    // z = preconditioner * s
+    z = s;
+    // t = A * z
+    spblas::multiply(a, z, t);
+    // gamma = dot(t, s)
+    gamma = blas::dot(s.size(), t.data(), 1, s.data(), 1);
+    // beta = dot(t, t)
+    beta = blas::dot(t.size(), t.data(), 1, t.data(), 1);
+    // omega = gamma / beta
+    if (beta == 0.0) {
+      omega = 0.0;
+    } else {
+      omega = gamma / beta;
+    }
+    // x = x + alpha * y + omega * z
+    tmp = z;
+    blas::scal(tmp.size(), omega, tmp.data(), 1);
+    blas::axpy(tmp.size(), alpha, y.data(), 1, tmp.data(), 1);
+    blas::axpy(x.size(), 1.0, tmp.data(), 1, x.data(), 1);
+
+    // r = s - omega * t
+    tmp = t;
+    blas::scal(tmp.size(), -omega, tmp.data(), 1);
+    blas::axpy(tmp.size(), 1.0, s.data(), 1, tmp.data(), 1);
+    r = tmp;
+
+    std::swap(prev_rho, rho);
+  }
+  return {error, iters};
+}
+
+} // namespace miniapps
+
+#endif // MINIAPPS_BICGSTAB_HH
diff --git a/miniapps/include/cg.hh b/miniapps/include/cg.hh
new file mode 100644
index 0000000..7dbbea1
--- /dev/null
+++ b/miniapps/include/cg.hh
@@ -0,0 +1,108 @@
+#ifndef MINIAPPS_CG_HH
+#define MINIAPPS_CG_HH
+
+#include <algorithm>
+#include <iostream>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "krylov.hh"
+#include "rl_blaspp.hh"
+#include "util.hh"
+#include <spblas/spblas.hpp>
+
+namespace miniapps {
+
+template <typename T>
+class CG : public Krylov<T> {
+public:
+  CG(T eps, int max_iters) : Krylov<T>{eps, max_iters} {}
+
+  /**
+   * CG or the conjugate gradient method is an iterative type Krylov subspace
+   * method which is suitable for symmetric positive definite methods.
+   *
+   * Though this method performs very well for symmetric positive definite
+   * matrices, it is in general not suitable for general matrices.
+   *
+   * We implement the CG variant from Section 2.3.1 of "Templates for the
+   * Solution of Linear Systems: Building Blocks for Iterative Methods"
+   *
+   * @param[in] A  the matrix in CSR format
+   * @param[in] b  the right hand side vector
+   * @param[out] x  the solution vector
+   */
+  std::tuple<double, int> apply(const spblas::csr_view<T> a,
+                                const std::vector<T> b,
+                                std::vector<T> x) override;
+};
+
+// -----------------------------------------------------------------------------
+template <typename T>
+std::tuple<double, int> CG<T>::apply(const spblas::csr_view<T> a,
+                                     const std::vector<T> b, std::vector<T> x) {
+  int iters = 0;
+  double error = 1.0;
+
+  // r = b
+  // rho = 0.0
+  // prev_rho = 1.0
+  // p = q = 0
+  std::vector<T> r(b);
+  std::vector<T> z(r);
+  std::vector<T> p(b.size(), 0);
+  std::vector<T> q(b.size(), 0);
+  std::vector<T> tmp(b.size(), 0);
+
+  double alpha, beta, rho;
+  alpha = beta = rho = 0.0;
+  double prev_rho = 1.0;
+
+  // r = b-Ax
+  spblas::multiply(a, x, tmp);
+  blas::axpy(r.size(), -1.0, tmp.data(), 1, r.data(), 1);
+
+  while (true) {
+    // z = preconditioner*r
+    z = r;
+    // rho = dot(r, z)
+    rho = blas::dot(r.size(), r.data(), 1, z.data(), 1);
+
+    iters++;
+    error = blas::nrm2(r.size(), r.data(), 1);
+    if (iters >= this->max_iters || error < this->eps) {
+      std::cout << "error = " << error << "\n";
+      std::cout << "iters = " << iters << "\n";
+      std::cout << "eps = " << this->eps << "\n";
+      std::cout << "max_iters = " << this->max_iters << "\n";
+      break;
+    }
+
+    // beta = rho / prev_rho;
+    // p = z + beta * p
+    beta = rho / prev_rho;
+    tmp = p;
+    blas::scal(tmp.size(), beta, tmp.data(), 1);
+    blas::axpy(tmp.size(), 1.0, z.data(), 1, tmp.data(), 1);
+    p = tmp;
+
+    // q = A * p
+    spblas::multiply(a, p, q);
+
+    // alpha = rho / dot(p, q)
+    alpha = rho / blas::dot(q.size(), p.data(), 1, q.data(), 1);
+
+    // x = x + alpha * p
+    // r = r - alpha * q
+    blas::axpy(x.size(), alpha, p.data(), 1, x.data(), 1);
+    blas::axpy(r.size(), -alpha, q.data(), 1, r.data(), 1);
+
+    std::swap(rho, prev_rho);
+  }
+  return {error, iters};
+}
+
+} // namespace miniapps
+
+#endif // MINIAPPS_CG_HH
diff --git a/miniapps/include/cqrrpt.hh b/miniapps/include/cqrrpt.hh
new file mode 100644
index 0000000..26e3f32
--- /dev/null
+++ b/miniapps/include/cqrrpt.hh
@@ -0,0 +1,208 @@
+#ifndef miniapps_cqrrpt_h
+#define miniapps_cqrrpt_h
+
+#include <chrono>
+#include <cstdint>
+#include <numeric>
+#include <vector>
+
+#include "rl_blaspp.hh"
+#include "rl_lapackpp.hh"
+#include "util.hh"
+#include <spblas/spblas.hpp>
+
+namespace miniapps {
+
+template <typename T>
+class CQRRPTalg {
+public:
+  virtual ~CQRRPTalg() {}
+
+  virtual int call(int64_t m, int64_t n, T* A, int64_t lda, T* R, int64_t ldr,
+                   int64_t* J, T d_factor) = 0;
+};
+
+template <typename T>
+class CQRRPT : public CQRRPTalg<T> {
+public:
+  CQRRPT(T ep, int64_t nz) {
+    eps = ep;
+    nnz = nz;
+  }
+
+  /// Computes a QR factorization with column pivots of the form:
+  ///     A[:, J] = QR,
+  /// where Q and R are of size m-by-k and k-by-n, with rank(A) = k.
+  /// Detailed description of this algorithm may be found in Section 5.1.2.
+  /// of "the RandLAPACK book".
+  ///
+  /// @param[in] m
+  ///     The number of rows in the matrix A.
+  ///
+  /// @param[in] n
+  ///     The number of columns in the matrix A.
+  ///
+  /// @param[in] A
+  ///     The m-by-n matrix A, stored in a column-major format.
+  ///
+  /// @param[in] d
+  ///     Embedding dimension of a sketch, m >= d >= n.
+  ///
+  /// @param[in] R
+  ///     Represents the upper-triangular R factor of QR factorization.
+  ///     On entry, is empty and may not have any space allocated for it.
+  ///
+  /// @param[out] A
+  ///     Overwritten by an m-by-k orthogonal Q factor.
+  ///     Matrix is stored explicitly.
+  ///
+  /// @param[out] R
+  ///     Stores k-by-n matrix with upper-triangular R factor.
+  ///     Zero entries are not compressed.
+  ///
+  /// @param[out] J
+  ///     Stores k integer type pivot index extries.
+  ///
+  /// @return = 0: successful exit
+  ///
+  /// @return = 1: cholesky factorization failed
+  ///
+
+  int call(int64_t m, int64_t n, T* A, int64_t lda, T* R, int64_t ldr,
+           int64_t* J, T d_factor) override;
+
+public:
+  T eps;
+  int64_t nnz;
+  int64_t rank;
+};
+
+// -----------------------------------------------------------------------------
+template <typename T>
+int CQRRPT<T>::call(int64_t m, int64_t n, T* A, int64_t lda, T* R, int64_t ldr,
+                    int64_t* J, T d_factor) {
+  int i;
+  int64_t k = n;
+  int64_t d = d_factor * n;
+  // A constant for initial rank estimation.
+  T eps_initial_rank_estimation =
+      2 * std::pow(std::numeric_limits<T>::epsilon(), 0.95);
+  // Variables for a posteriori rank estimation.
+  int64_t new_rank;
+  T running_max, running_min, curr_entry;
+
+  T* A_hat = (T*) calloc(d * n, sizeof(T));
+  T* tau = (T*) calloc(n, sizeof(T));
+  // Buffer for column pivoting.
+  std::vector<int64_t> J_buf(n, 0);
+
+  /* RandBLAS style
+  /// Generating a SASO
+  RandBLAS::SparseDist DS = {.n_rows = d, .n_cols = m, .vec_nnz = this->nnz};
+  RandBLAS::SparseSkOp<T, RNG> S(DS, state);
+  state = RandBLAS::fill_sparse(S);
+
+  /// Applying a SASO
+  RandBLAS::sketch_general(
+      Layout::ColMajor, Op::NoTrans, Op::NoTrans,
+      d, n, m, 1.0, S, 0, 0, A, lda, 0.0, A_hat, d
+  );
+  */
+
+  /// SparseBLAS style
+  /// Our sketching operators must be in COO format.
+  // auto&& [values, rowptr, colind, shape, _] = spblas::generate_coo<T>(d, m,
+  // nnz);
+  /// TODO: add COO view
+  // spblas::coo_view<T> s(values, rowptr, colind, shape, nnz);
+
+  /// Perform dense sketching for the sake of tests passing.
+  /// This is not the intended behavior for this algorithm, as dense sketching
+  /// tanks the performance of CQRRPT.
+  auto [buf, a_shape] = spblas::generate_gaussian<double>(d, m);
+  spblas::__mdspan::mdspan s(buf.data(), d, m);
+
+  spblas::__mdspan::mdspan a(A, m, n);
+  spblas::__mdspan::mdspan ahat(A_hat, d, n);
+
+  spblas::multiply(s, a, ahat);
+  A_hat = ahat.data_handle();
+
+  /// Performing QRCP on a sketch
+  lapack::geqp3(d, n, A_hat, d, J, tau);
+
+  /// Using naive rank estimation to ensure that R used for preconditioning is
+  /// invertible. The actual rank estimate k will be computed a posteriori.
+  /// Using R[i,i] to approximate the i-th singular value of A_hat.
+  /// Truncate at the largest i where R[i,i] / R[0,0] >= eps.
+  for (i = 0; i < n; ++i) {
+    if (std::abs(A_hat[i * d + i]) / std::abs(A_hat[0]) <
+        eps_initial_rank_estimation) {
+      k = i;
+      break;
+    }
+  }
+  this->rank = k;
+
+  // Allocating space for a preconditioner buffer.
+  T* R_sp = (T*) calloc(k * k, sizeof(T));
+  /// Extracting a k by k upper-triangular R.
+  lapack::lacpy(MatrixType::Upper, k, k, A_hat, d, R_sp, k);
+  /// Extracting a k by n R representation (k by k upper-triangular, rest -
+  /// general)
+  lapack::lacpy(MatrixType::Upper, k, k, A_hat, d, R, ldr);
+  lapack::lacpy(MatrixType::General, k, n - k, &A_hat[d * k], d, &R[n * k],
+                ldr);
+
+  // Swap k columns of A with pivots from J
+  blas::copy(n, J, 1, J_buf.data(), 1);
+  util::col_swap(m, n, k, A, lda, J_buf);
+
+  // A_pre * R_sp = AP
+  blas::trsm(Layout::ColMajor, Side::Right, Uplo::Upper, Op::NoTrans,
+             Diag::NonUnit, m, k, 1.0, R_sp, k, A, lda);
+
+  // Do Cholesky QR
+  blas::syrk(Layout::ColMajor, Uplo::Upper, Op::Trans, k, m, 1.0, A, lda, 0.0,
+             R_sp, k);
+  lapack::potrf(Uplo::Upper, k, R_sp, k);
+
+  // Re-estimate rank after we have the R-factor form Cholesky QR.
+  // The strategy here is the same as in naive rank estimation.
+  // This also automatically takes care of any potentical failures in Cholesky
+  // factorization. Note that the diagonal of R_sp may not be sorted, so we need
+  // to keep the running max/min We expect the loss in the orthogonality of Q to
+  // be approximately equal to u * cond(R_sp)^2, where u is the unit roundoff
+  // for the numerical type T.
+  new_rank = k;
+  running_max = R_sp[0];
+  running_min = R_sp[0];
+
+  for (i = 0; i < k; ++i) {
+    curr_entry = std::abs(R_sp[i * k + i]);
+    running_max = std::max(running_max, curr_entry);
+    running_min = std::min(running_min, curr_entry);
+    if (running_max / running_min >=
+        std::sqrt(this->eps / std::numeric_limits<T>::epsilon())) {
+      new_rank = i - 1;
+      break;
+    }
+  }
+
+  blas::trsm(Layout::ColMajor, Side::Right, Uplo::Upper, Op::NoTrans,
+             Diag::NonUnit, m, new_rank, 1.0, R_sp, k, A, lda);
+  // Get the final R-factor.
+  blas::trmm(Layout::ColMajor, Side::Left, Uplo::Upper, Op::NoTrans,
+             Diag::NonUnit, new_rank, n, 1.0, R_sp, k, R, ldr);
+
+  // Set the rank parameter to the value comuted a posteriori.
+  this->rank = k;
+
+  free(A_hat);
+  free(R_sp);
+  free(tau);
+
+  return 0;
+}
+} // end namespace miniapps
+#endif
diff --git a/miniapps/include/krylov.hh b/miniapps/include/krylov.hh
new file mode 100644
index 0000000..e24f3a5
--- /dev/null
+++ b/miniapps/include/krylov.hh
@@ -0,0 +1,40 @@
+#ifndef MINIAPPS_KRYLOV_HH
+#define MINIAPPS_KRYLOV_HH
+
+#include <vector>
+
+#include <spblas/spblas.hpp>
+
+namespace miniapps {
+
+/**
+ * A very simple Krylov Solver interface for convenience.
+ *
+ * @tparam T  the value type
+ */
+template <typename T>
+class Krylov {
+public:
+  Krylov(T eps, int max_iters) : eps{eps}, max_iters{max_iters} {}
+
+  virtual ~Krylov() {}
+
+  virtual std::tuple<double, int> apply(const spblas::csr_view<T> a,
+                                        const std::vector<T> b,
+                                        std::vector<T> x) = 0;
+
+  T get_eps() {
+    return eps;
+  }
+
+  int get_max_iters() {
+    return max_iters;
+  }
+
+  const T eps;
+  const int max_iters;
+};
+
+} // namespace miniapps
+
+#endif // MINIAPPS_KRYLOV_HH
diff --git a/miniapps/include/matrix_utils.hh b/miniapps/include/matrix_utils.hh
new file mode 100644
index 0000000..50adaa2
--- /dev/null
+++ b/miniapps/include/matrix_utils.hh
@@ -0,0 +1,151 @@
+#ifndef SPBLAS_MINIAPPS_MATRIX_UTILS_HH
+#define SPBLAS_MINIAPPS_MATRIX_UTILS_HH
+
+#include <spblas/spblas.hpp>
+
+#include <algorithm>
+#include <cstddef>
+#include <exception>
+#include <vector>
+
+namespace miniapps {
+
+template <typename T>
+struct matrix_data_entry {
+  std::size_t row;
+  std::size_t col;
+  T value;
+};
+
+template <typename T, typename I = spblas::index_t>
+struct matrix_data {
+  using nonzero_type = matrix_data_entry<T>;
+
+  I num_rows;
+  I num_cols;
+  std::vector<nonzero_type> nonzeros;
+
+  matrix_data(std::vector<I> rowind, std::vector<I> colind,
+              std::vector<T> values, spblas::index<I> shape)
+      : num_rows(shape[0]), num_cols(shape[1]) {
+    assert(rowind.size() == colind.size() && rowind.size() == values.size());
+    for (std::size_t i = 0; i < values.size(); i++) {
+      nonzeros.emplace_back(rowind[i], colind[i], values[i]);
+    }
+  }
+
+  auto convert_to_coo() {
+    auto nnz = nonzeros.size();
+    std::vector<T> values(nnz);
+    std::vector<I> rowind(nnz);
+    std::vector<I> colind(nnz);
+
+    for (std::size_t i = 0; i < nnz; i++) {
+      values[i] = nonzeros[i].value;
+      rowind[i] = nonzeros[i].row;
+      colind[i] = nonzeros[i].col;
+    }
+    return std::tuple(values, rowind, colind, nnz);
+  }
+
+  void sort_row_major() {
+    std::sort(begin(nonzeros), end(nonzeros),
+              [](nonzero_type x, nonzero_type y) {
+                return std::tie(x.row, x.col) < std::tie(y.row, y.col);
+              });
+  }
+
+  void sum_duplicates() {
+    this->sort_row_major();
+    std::vector<nonzero_type> new_nonzeros;
+    if (!nonzeros.empty()) {
+      new_nonzeros.emplace_back(nonzeros.front().row, nonzeros.front().col,
+                                0.0);
+      for (auto entry : nonzeros) {
+        if (entry.row != new_nonzeros.back().row ||
+            entry.col != new_nonzeros.back().col) {
+          new_nonzeros.emplace_back(entry.row, entry.col, 0.0);
+        }
+        new_nonzeros.back().value += entry.value;
+      }
+      nonzeros = std::move(new_nonzeros);
+    }
+  }
+
+  void make_symmetric() {
+    const auto nnz = nonzeros.size();
+    // compute (A + op(A^T)) / 2
+    for (std::size_t i = 0; i < nnz; i++) {
+      nonzeros[i].value /= 2.0;
+      auto entry = nonzeros[i];
+      nonzeros.emplace_back(entry.col, entry.row, entry.value);
+    }
+    // combine duplicates
+    this->sum_duplicates();
+  }
+
+  void make_diag_dominant(T ratio = 1.0) {
+    std::vector<T> norms(num_rows);
+    std::vector<std::int64_t> diag_positions(num_rows, -1);
+    std::int64_t i{};
+    for (auto entry : nonzeros) {
+      if (entry.row == entry.col) {
+        diag_positions[entry.row] = i;
+      } else {
+        norms[entry.row] += std::abs(entry.value);
+      }
+      i++;
+    }
+    for (i = 0; i < num_rows; i++) {
+      if (norms[i] == 0.0) {
+        // make sure empty rows don't make the matrix singular
+        norms[i] = 1.0;
+      }
+      if (diag_positions[i] < 0) {
+        nonzeros.emplace_back(i, i, norms[i] * ratio);
+      } else {
+        auto& diag_value = nonzeros[diag_positions[i]].value;
+        const auto diag_magnitude = std::abs(diag_value);
+        const auto offdiag_magnitude = norms[i];
+        if (diag_magnitude < offdiag_magnitude * ratio) {
+          const auto scaled_value =
+              diag_value * (offdiag_magnitude * ratio / diag_magnitude);
+          if (std::isfinite(scaled_value)) {
+            diag_value = scaled_value;
+          } else {
+            diag_value = offdiag_magnitude * ratio;
+          }
+        }
+      }
+    }
+    this->sort_row_major();
+  }
+};
+
+template <typename I = std::size_t>
+auto convert_rowind_to_rowptr(std::vector<I> rowind, std::size_t nnz,
+                              spblas::index<I> shape) {
+  auto num_rows = shape[0];
+  std::vector<I> rowptr(num_rows + 1, 0);
+  assert(rowind.size() == nnz);
+
+  for (I i = 0; i < nnz; i++) {
+    rowptr[rowind[i]]++;
+  }
+  constexpr auto max = std::numeric_limits<I>::max();
+  std::size_t partial_sum{};
+  for (std::size_t i = 0; i < num_rows + 1; ++i) {
+    auto this_nnz = i < num_rows ? rowptr[i] : 0;
+    rowptr[i] = partial_sum;
+    if (max - partial_sum < this_nnz) {
+      throw std::exception();
+    }
+    partial_sum += this_nnz;
+  }
+
+  return rowptr;
+}
+
+} // namespace miniapps
+
+#endif // SPBLAS_MINIAPPS_MATRIX_UTILS_HH
diff --git a/miniapps/include/miniapps.hh b/miniapps/include/miniapps.hh
new file mode 100644
index 0000000..291c427
--- /dev/null
+++ b/miniapps/include/miniapps.hh
@@ -0,0 +1,19 @@
+#ifndef SPBLAS_MINIAPPS_HH
+#define SPBLAS_MINIAPPS_HH
+
+// misc
+#include "util.hh"
+
+// misc
+#include "matrix_utils.hh"
+
+// randnla
+#include "cqrrpt.hh"
+
+// cg
+#include "cg.hh"
+
+// bicgstab
+#include "bicgstab.hh"
+
+#endif // SPBLAS_MINIAPPS_HH
diff --git a/miniapps/include/rl_blaspp.hh b/miniapps/include/rl_blaspp.hh
new file mode 100644
index 0000000..1ac915e
--- /dev/null
+++ b/miniapps/include/rl_blaspp.hh
@@ -0,0 +1,12 @@
+#ifndef randlapack_blaspp_h
+#define randlapack_blaspp_h
+
+#include <blas.hh>
+
+using Layout = blas::Layout;
+using Op = blas::Op;
+using Side = blas::Side;
+using Diag = blas::Diag;
+using Uplo = blas::Uplo;
+
+#endif
diff --git a/miniapps/include/rl_config.hh.in b/miniapps/include/rl_config.hh.in
new file mode 100644
index 0000000..68e2991
--- /dev/null
+++ b/miniapps/include/rl_config.hh.in
@@ -0,0 +1,11 @@
+#ifndef miniapps_config_h
+#define miniapps_config_h
+
+#define miniapps_VERSION "@miniapps_VERSION@"
+#define miniapps_VERSION_MAJOR @miniapps_VERSION_MAJOR@
+#define miniapps_VERSION_MINOR @miniapps_VERSION_MINOR@
+#define miniapps_VERSION_PATCH @miniapps_VERSION_PATCH@
+
+#CMAKEDEFINE miniapps_HAS_OpenMP
+
+#endif
diff --git a/miniapps/include/rl_lapackpp.hh b/miniapps/include/rl_lapackpp.hh
new file mode 100644
index 0000000..ba62260
--- /dev/null
+++ b/miniapps/include/rl_lapackpp.hh
@@ -0,0 +1,12 @@
+#ifndef randlapack_lapackpp_h
+#define randlapack_lapackpp_h
+
+#define LAPACK_COMPLEX_CPP 1
+
+#include <lapack.hh>
+
+using Job = lapack::Job;
+using MatrixType = lapack::MatrixType;
+using Norm = lapack::Norm;
+
+#endif
diff --git a/miniapps/include/util.hh b/miniapps/include/util.hh
new file mode 100644
index 0000000..0066136
--- /dev/null
+++ b/miniapps/include/util.hh
@@ -0,0 +1,36 @@
+#ifndef miniapps_util_h
+#define miniapps_util_h
+
+#include "rl_blaspp.hh"
+#include "rl_lapackpp.hh"
+#include <spblas/spblas.hpp>
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <iostream>
+#include <vector>
+
+namespace miniapps::util {
+
+/// A version of the above function to be used on a vector of integers
+template <typename T>
+void col_swap(int64_t m, int64_t n, int64_t k, T* A, int64_t lda,
+              std::vector<int64_t> idx) {
+  if (k > n)
+    throw std::runtime_error("Invalid rank parameter.");
+
+  int64_t i, j; //, l;
+  for (i = 0, j = 0; i < k; ++i) {
+    j = idx[i] - 1;
+    blas::swap(m, &A[i * lda], 1, &A[j * lda], 1);
+
+    // swap idx array elements
+    // Find idx element with value i and assign it to j
+    auto it = std::find(idx.begin() + i, idx.begin() + k, i + 1);
+    idx[it - (idx.begin())] = j + 1;
+  }
+}
+
+} // namespace miniapps::util
+#endif
diff --git a/miniapps/test/CMakeLists.txt b/miniapps/test/CMakeLists.txt
new file mode 100644
index 0000000..733220a
--- /dev/null
+++ b/miniapps/test/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_subdirectory(randnla)
+add_subdirectory(solvers)
diff --git a/miniapps/test/randnla/CMakeLists.txt b/miniapps/test/randnla/CMakeLists.txt
new file mode 100644
index 0000000..8696f44
--- /dev/null
+++ b/miniapps/test/randnla/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_executable(miniapps_randnla_tests
+  test_cqrrpt.cc)
+
+include(GoogleTest)
+target_link_libraries(miniapps_randnla_tests miniapps GTest::gtest_main)
+
+gtest_discover_tests(miniapps_randnla_tests)
diff --git a/miniapps/test/randnla/test_cqrrpt.cc b/miniapps/test/randnla/test_cqrrpt.cc
new file mode 100644
index 0000000..676a256
--- /dev/null
+++ b/miniapps/test/randnla/test_cqrrpt.cc
@@ -0,0 +1,142 @@
+#include "miniapps.hh"
+#include "rl_blaspp.hh"
+#include "rl_lapackpp.hh"
+#include <spblas/spblas.hpp>
+
+#include <fstream>
+#include <gtest/gtest.h>
+
+class TestCQRRPT : public ::testing::Test {
+protected:
+  virtual void SetUp(){};
+
+  virtual void TearDown(){};
+
+  template <typename T>
+  struct CQRRPTTestData {
+    int64_t row;
+    int64_t col;
+    int64_t rank;
+    std::vector<T> A;
+    std::vector<T> R;
+    std::vector<int64_t> J;
+    std::vector<T> A_cpy1;
+    std::vector<T> A_cpy2;
+    std::vector<T> I_ref;
+
+    CQRRPTTestData(int64_t m, int64_t n, int64_t k)
+        : A(m * n, 0.0), R(n * n, 0.0), J(n, 0), A_cpy1(m * n, 0.0),
+          A_cpy2(m * n, 0.0), I_ref(k * k, 0.0) {
+      row = m;
+      col = n;
+      rank = k;
+    }
+  };
+
+  template <typename T>
+  static void norm_and_copy_computational_helper(T& norm_A,
+                                                 CQRRPTTestData<T>& all_data) {
+    auto m = all_data.row;
+    auto n = all_data.col;
+
+    lapack::lacpy(MatrixType::General, m, n, all_data.A.data(), m,
+                  all_data.A_cpy1.data(), m);
+    lapack::lacpy(MatrixType::General, m, n, all_data.A.data(), m,
+                  all_data.A_cpy2.data(), m);
+    norm_A = lapack::lange(Norm::Fro, m, n, all_data.A.data(), m);
+  }
+
+  template <typename T>
+  static void error_check(T& norm_A, CQRRPTTestData<T>& all_data) {
+
+    auto m = all_data.row;
+    auto n = all_data.col;
+    auto k = all_data.rank;
+
+    all_data.I_ref.resize(k * k, 0.0);
+    for (int i = 0; i < k; ++i)
+      all_data.I_ref[(k + 1) * i] = 1.0;
+
+    T* A_dat = all_data.A_cpy1.data();
+    T const* A_cpy_dat = all_data.A_cpy2.data();
+    T const* Q_dat = all_data.A.data();
+    T const* R_dat = all_data.R.data();
+    T* I_ref_dat = all_data.I_ref.data();
+
+    // Check orthogonality of Q
+    // Q' * Q  - I = 0
+    blas::syrk(Layout::ColMajor, Uplo::Upper, Op::Trans, k, m, 1.0, Q_dat, m,
+               -1.0, I_ref_dat, k);
+    T norm_QTQ = lapack::lansy(lapack::Norm::Fro, Uplo::Upper, k, I_ref_dat, k);
+
+    // A - QR
+    blas::gemm(Layout::ColMajor, Op::NoTrans, Op::NoTrans, m, n, k, 1.0, Q_dat,
+               m, R_dat, n, -1.0, A_dat, m);
+
+    // Implementing max col norm metric
+    T max_col_norm = 0.0;
+    T col_norm = 0.0;
+    int max_idx = 0;
+    for (int i = 0; i < n; ++i) {
+      col_norm = blas::nrm2(m, &A_dat[m * i], 1);
+      if (max_col_norm < col_norm) {
+        max_col_norm = col_norm;
+        max_idx = i;
+      }
+    }
+    T col_norm_A = blas::nrm2(n, &A_cpy_dat[m * max_idx], 1);
+    T norm_AQR = lapack::lange(Norm::Fro, m, n, A_dat, m);
+
+    printf("REL NORM OF AP - QR:    %15e\n", norm_AQR / norm_A);
+    printf("MAX COL NORM METRIC:    %15e\n", max_col_norm / col_norm_A);
+    printf("FRO NORM OF (Q'Q - I):  %2e\n\n", norm_QTQ);
+
+    T atol = std::pow(std::numeric_limits<T>::epsilon(), 0.75);
+    ASSERT_NEAR(norm_AQR / norm_A, 0.0, atol);
+    ASSERT_NEAR(max_col_norm / col_norm_A, 0.0, atol);
+    ASSERT_NEAR(norm_QTQ, 0.0, atol);
+  }
+
+  /// General test for CQRRPT:
+  /// Computes QR factorzation, and computes A[:, J] - QR.
+  template <typename T, typename alg_type>
+  static void test_CQRRPT_general(T d_factor, T norm_A,
+                                  CQRRPTTestData<T>& all_data,
+                                  alg_type& CQRRPT) {
+
+    auto m = all_data.row;
+    auto n = all_data.col;
+
+    CQRRPT.call(m, n, all_data.A.data(), m, all_data.R.data(), n,
+                all_data.J.data(), d_factor);
+    all_data.rank = CQRRPT.rank;
+
+    printf("RANK AS RETURNED BY CQRRPT %ld\n", all_data.rank);
+
+    miniapps::util::col_swap(m, n, n, all_data.A_cpy1.data(), m, all_data.J);
+    miniapps::util::col_swap(m, n, n, all_data.A_cpy2.data(), m, all_data.J);
+
+    error_check(norm_A, all_data);
+  }
+};
+
+TEST_F(TestCQRRPT, CQRRPT_full_rank_no_hqrrp) {
+  int64_t m = 50;
+  int64_t n = 20;
+  int64_t k = 20;
+  double d_factor = 1.25;
+  double norm_A = 0;
+  int64_t nnz = 2;
+  double tol = std::pow(std::numeric_limits<double>::epsilon(), 0.85);
+
+  CQRRPTTestData<double> all_data(m, n, k);
+  miniapps::CQRRPT<double> CQRRPT(tol, nnz);
+  // Generate dense matrix
+  auto [buf, a_shape] = spblas::generate_dense<double>(m, n);
+
+  lapack::lacpy(MatrixType::General, m, n, buf.data(), m, all_data.A.data(), m);
+
+  norm_and_copy_computational_helper<double>(norm_A, all_data);
+  test_CQRRPT_general<double, miniapps::CQRRPT<double>>(d_factor, norm_A,
+                                                        all_data, CQRRPT);
+}
diff --git a/miniapps/test/solvers/CMakeLists.txt b/miniapps/test/solvers/CMakeLists.txt
new file mode 100644
index 0000000..70a99d3
--- /dev/null
+++ b/miniapps/test/solvers/CMakeLists.txt
@@ -0,0 +1,8 @@
+add_executable(miniapps_solvers_tests
+  test_cg.cpp
+  test_bicgstab.cpp)
+
+target_link_libraries(miniapps_solvers_tests miniapps GTest::gtest_main)
+
+include(GoogleTest)
+gtest_discover_tests(miniapps_solvers_tests)
diff --git a/miniapps/test/solvers/test_bicgstab.cpp b/miniapps/test/solvers/test_bicgstab.cpp
new file mode 100644
index 0000000..999702c
--- /dev/null
+++ b/miniapps/test/solvers/test_bicgstab.cpp
@@ -0,0 +1,73 @@
+#include "miniapps.hh"
+#include <spblas/spblas.hpp>
+
+#include <algorithm>
+#include <exception>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+template <typename T>
+class TestBiCGSTAB : public testing::Test {
+public:
+  auto generate_problem(std::size_t m, std::size_t n, std::size_t nnz_input,
+                        std::size_t seed = 0) {
+    auto&& [values_orig, rowind_orig, colind_orig, shape, nnz_orig] =
+        spblas::generate_coo<T>(m, n, nnz_input, seed);
+    miniapps::matrix_data<T> data(rowind_orig, colind_orig, values_orig, shape);
+    auto&& [values, rowind, colind, nnz] = data.convert_to_coo();
+    auto rowptr = miniapps::convert_rowind_to_rowptr(colind, nnz, shape);
+    return std::tuple(values, rowptr, colind, shape, nnz);
+  }
+
+  auto generate_spd_problem(std::size_t m, std::size_t n, std::size_t nnz_input,
+                            std::size_t seed = 0) {
+    auto&& [values_orig, rowind_orig, colind_orig, shape, nnz_orig] =
+        spblas::generate_coo<T>(m, n, nnz_input, seed);
+    miniapps::matrix_data<T> data(rowind_orig, colind_orig, values_orig, shape);
+    data.sort_row_major();
+    data.make_symmetric();
+    data.make_diag_dominant();
+    auto&& [values, rowind, colind, nnz] = data.convert_to_coo();
+    auto rowptr = miniapps::convert_rowind_to_rowptr(colind, nnz, shape);
+    return std::tuple(values, rowptr, colind, shape, nnz);
+  }
+};
+
+using BiCGSTABTestTypes = ::testing::Types<double, float>;
+TYPED_TEST_SUITE(TestBiCGSTAB, BiCGSTABTestTypes);
+
+TYPED_TEST(TestBiCGSTAB, ConvergesForSmallSystem) {
+  using T = TypeParam;
+  constexpr double tol = std::is_same<T, double>::value ? 1e-14 : 1e-7;
+  constexpr int max_iters = 100;
+  std::tuple<double, int> res;
+  auto&& [values, rowptr, colind, shape, nnz] =
+      this->generate_problem(10, 10, 42, 75);
+  spblas::csr_view<T> a(values, rowptr, colind, shape, nnz);
+  std::vector<T> b(10, 1.0);
+  std::vector<T> x(10, 0.0);
+
+  miniapps::BiCGSTAB<T> BiCGSTAB(tol, max_iters);
+  auto [error, iters] = BiCGSTAB.apply(a, b, x);
+
+  ASSERT_LE(error, tol);
+  ASSERT_LE(iters, max_iters);
+}
+
+TYPED_TEST(TestBiCGSTAB, ConvergesForLargeSystem) {
+  using T = TypeParam;
+  constexpr double tol = std::is_same<T, double>::value ? 1e-14 : 1e-7;
+  constexpr int max_iters = 100;
+  std::vector<T> b(1000, 1.0);
+  std::vector<T> x(1000, 0.0);
+  auto&& [values, rowptr, colind, shape, nnz] =
+      this->generate_spd_problem(1000, 1000, 12345, 75);
+  spblas::csr_view<T> a(values, rowptr, colind, shape, nnz);
+
+  miniapps::BiCGSTAB<T> BiCGSTAB(tol, max_iters);
+  auto [error, iters] = BiCGSTAB.apply(a, b, x);
+
+  ASSERT_LE(error, tol);
+  ASSERT_LE(iters, max_iters);
+}
diff --git a/miniapps/test/solvers/test_cg.cpp b/miniapps/test/solvers/test_cg.cpp
new file mode 100644
index 0000000..0e159ca
--- /dev/null
+++ b/miniapps/test/solvers/test_cg.cpp
@@ -0,0 +1,66 @@
+#include "miniapps.hh"
+#include <spblas/spblas.hpp>
+
+#include <algorithm>
+#include <exception>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+template <typename T>
+class TestCG : public testing::Test {
+public:
+  using ValueType = T;
+
+  auto generate_spd_problem(std::size_t m, std::size_t n, std::size_t nnz_input,
+                            std::size_t seed = 0) {
+    auto&& [values_orig, rowind_orig, colind_orig, shape, nnz_orig] =
+        spblas::generate_coo<T, spblas::index_t>(m, n, nnz_input, seed);
+    miniapps::matrix_data<T> data(rowind_orig, colind_orig, values_orig, shape);
+    data.sort_row_major();
+    data.make_symmetric();
+    data.make_diag_dominant();
+    auto&& [values, rowind, colind, nnz] = data.convert_to_coo();
+    auto rowptr = miniapps::convert_rowind_to_rowptr(colind, nnz, shape);
+    return std::tuple(values, rowptr, colind, shape, nnz);
+  }
+};
+
+using CGTestTypes = ::testing::Types<double, float>;
+TYPED_TEST_SUITE(TestCG, CGTestTypes);
+
+TYPED_TEST(TestCG, ConvergesForSmallSystem) {
+  using T = TypeParam;
+  constexpr double tol = std::is_same<T, double>::value ? 1e-14 : 1e-7;
+  constexpr int max_iters = 100;
+  std::vector<T> b(10, 1.0);
+  std::vector<T> x(10, 0.0);
+
+  auto&& [values, rowptr, colind, shape, nnz] =
+      this->generate_spd_problem(10, 10, 42, 75);
+  spblas::csr_view<T> a(values, rowptr, colind, shape, nnz);
+
+  miniapps::CG<T> CG(tol, max_iters);
+  auto [error, iters] = CG.apply(a, b, x);
+
+  ASSERT_LE(error, tol);
+  ASSERT_LE(iters, max_iters);
+}
+
+TYPED_TEST(TestCG, ConvergesForLargeSystem) {
+  using T = TypeParam;
+  constexpr double tol = std::is_same<T, double>::value ? 1e-14 : 1e-7;
+  constexpr int max_iters = 100;
+  std::vector<T> b(1000, 1.0);
+  std::vector<T> x(1000, 0.0);
+
+  auto&& [values, rowptr, colind, shape, nnz] =
+      this->generate_spd_problem(1000, 1000, 12345, 75);
+  spblas::csr_view<T> a(values, rowptr, colind, shape, nnz);
+
+  miniapps::CG<T> CG(tol, max_iters);
+  auto [error, iters] = CG.apply(a, b, x);
+
+  ASSERT_LE(error, tol);
+  ASSERT_LE(iters, max_iters);
+}