From 1e19e3bab29cd887bddcd6f6b5090d655b775546 Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Thu, 21 Aug 2025 17:52:59 +0200 Subject: [PATCH] simple operation like scale, norm on csr --- include/spblas/vendor/cusparse/cusparse.hpp | 1 + include/spblas/vendor/cusparse/exception.hpp | 34 +++++ include/spblas/vendor/cusparse/simple_op.hpp | 152 +++++++++++++++++++ 3 files changed, 187 insertions(+) create mode 100644 include/spblas/vendor/cusparse/simple_op.hpp diff --git a/include/spblas/vendor/cusparse/cusparse.hpp b/include/spblas/vendor/cusparse/cusparse.hpp index 7caa698..4bc3c95 100644 --- a/include/spblas/vendor/cusparse/cusparse.hpp +++ b/include/spblas/vendor/cusparse/cusparse.hpp @@ -1,3 +1,4 @@ #pragma once #include "multiply.hpp" +#include "simple_op.hpp" diff --git a/include/spblas/vendor/cusparse/exception.hpp b/include/spblas/vendor/cusparse/exception.hpp index d90ac30..370da8a 100644 --- a/include/spblas/vendor/cusparse/exception.hpp +++ b/include/spblas/vendor/cusparse/exception.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -57,6 +58,39 @@ void throw_if_error(cusparseStatus_t error_code) { } } +// Throw an exception if the cublasStatus_t is not CUBLAS_STATUS_SUCCESS. +void throw_if_error(cublasStatus_t error_code) { + if (error_code == CUBLAS_STATUS_SUCCESS) { + return; + } else if (error_code == CUBLAS_STATUS_NOT_INITIALIZED) { + throw std::runtime_error( + "cuBLAS encountered an error: \"CUBLAS_STATUS_NOT_INITIALIZED\""); + } else if (error_code == CUBLAS_STATUS_ALLOC_FAILED) { + throw std::runtime_error( + "cuBLAS encountered an error: \"CUBLAS_STATUS_ALLOC_FAILED\""); + } else if (error_code == CUBLAS_STATUS_INVALID_VALUE) { + throw std::runtime_error( + "cuBLAS encountered an error: \"CUBLAS_STATUS_INVALID_VALUE\""); + } else if (error_code == CUBLAS_STATUS_ARCH_MISMATCH) { + throw std::runtime_error( + "cuBLAS encountered an error: \"CUBLAS_STATUS_ARCH_MISMATCH\""); + } else if (error_code == CUBLAS_STATUS_MAPPING_ERROR) { + throw std::runtime_error( + "cuBLAS encountered an error: \"CUBLAS_STATUS_MAPPING_ERROR\""); + } else if (error_code == CUBLAS_STATUS_EXECUTION_FAILED) { + throw std::runtime_error( + "cuBLAS encountered an error: \"CUBLAS_STATUS_EXECUTION_FAILED\""); + } else if (error_code == CUBLAS_STATUS_INTERNAL_ERROR) { + throw std::runtime_error("cuBLAS encountered an error: " + "\"CUBLAS_STATUS_INTERNAL_ERROR\""); + } else if (error_code == CUBLAS_STATUS_NOT_SUPPORTED) { + throw std::runtime_error( + "cuBLAS encountered an error: \"CUBLAS_STATUS_NOT_SUPPORTED\""); + } else { + throw std::runtime_error("cuBLAS encountered an error: \"unknown error\""); + } +} + } // namespace __cusparse } // namespace spblas diff --git a/include/spblas/vendor/cusparse/simple_op.hpp b/include/spblas/vendor/cusparse/simple_op.hpp new file mode 100644 index 0000000..73e6fbe --- /dev/null +++ b/include/spblas/vendor/cusparse/simple_op.hpp @@ -0,0 +1,152 @@ +#pragma once + +#include +#include + +#include +#include + +#include +#include +#include + +#include "cuda_allocator.hpp" +#include "detail/cusparse_tensors.hpp" +#include "exception.hpp" +#include "types.hpp" + +namespace spblas { + +class simple_operation_state_t { +public: + simple_operation_state_t() + : simple_operation_state_t(cusparse::cuda_allocator{}) {} + + simple_operation_state_t(cusparse::cuda_allocator alloc) + : alloc_(alloc) { + cublasHandle_t handle; + __cusparse::throw_if_error(cublasCreate(&handle)); + if (auto stream = alloc.stream()) { + __cusparse::throw_if_error(cublasSetStream(handle, stream)); + } + handle_ = handle_manager(handle, [](cublasHandle_t handle) { + __cusparse::throw_if_error(cublasDestroy(handle)); + }); + } + + simple_operation_state_t(cusparse::cuda_allocator alloc, + cublasHandle_t handle) + : alloc_(alloc) { + handle_ = handle_manager(handle, [](cublasHandle_t handle) { + // it is provided by user, we do not delete it at all. + }); + } + + template + requires __detail::has_csr_base + void scale(typename std::remove_reference_t::scalar_type val, A&& a) { + auto a_base = __detail::get_ultimate_base(a); + using matrix_type = decltype(a_base); + using value_type = typename matrix_type::scalar_type; + if constexpr (std::is_same_v) { + __cusparse::throw_if_error( + cublasSscal(handle_.get(), static_cast(a_base.values().size()), + &val, a_base.values().data(), 1)); + } else if constexpr (std::is_same_v) { + __cusparse::throw_if_error( + cublasDscal(handle_.get(), static_cast(a_base.values().size()), + &val, a_base.values().data(), 1)); + } else { + throw std::runtime_error("not implemented"); + } + } + + template + requires __detail::has_csr_base + typename std::remove_reference_t::scalar_type matrix_inf_norm(A&& a) { + auto a_base = __detail::get_ultimate_base(a); + using matrix_type = decltype(a_base); + using value_type = typename matrix_type::scalar_type; + using index_type = typename matrix_type::index_type; + value_type result = 0; + // very slow implementation by calling cublas row by row + for (int i = 0; i < __backend::shape(a_base)[0]; i++) { + value_type tmp = 0; + index_type start, end; + __cusparse::throw_if_error(cudaMemcpy(&start, a_base.rowptr().data() + i, + sizeof(index_type), + cudaMemcpyDeviceToHost)); + __cusparse::throw_if_error( + cudaMemcpy(&end, a_base.rowptr().data() + i + 1, sizeof(index_type), + cudaMemcpyDeviceToHost)); + if constexpr (std::is_same_v) { + __cusparse::throw_if_error(cublasSasum(handle_.get(), end - start, + a_base.values().data() + start, + 1, &tmp)); + } else if constexpr (std::is_same_v) { + __cusparse::throw_if_error(cublasDasum(handle_.get(), end - start, + a_base.values().data() + start, + 1, &tmp)); + } else { + throw std::runtime_error("not implemented"); + } + result = std::max(result, tmp); + } + return result; + } + + template + requires __detail::has_csr_base + typename std::remove_reference_t::scalar_type matrix_frob_norm(A&& a) { + auto a_base = __detail::get_ultimate_base(a); + using matrix_type = decltype(a_base); + using value_type = typename matrix_type::scalar_type; + value_type result(0.0); + if constexpr (std::is_same_v) { + __cusparse::throw_if_error( + cublasSnrm2(handle_.get(), static_cast(a_base.values().size()), + a_base.values().data(), 1, &result)); + } else if constexpr (std::is_same_v) { + __cusparse::throw_if_error( + cublasDnrm2(handle_.get(), static_cast(a_base.values().size()), + a_base.values().data(), 1, &result)); + } else { + throw std::runtime_error("not implemented"); + } + return result; + } + +private: + using handle_manager = + std::unique_ptr::element_type, + std::function>; + handle_manager handle_; + cusparse::cuda_allocator alloc_; +}; + +using scale_state_t = simple_operation_state_t; +using matrix_inf_norm_state_t = simple_operation_state_t; +using matrix_frob_norm_state_t = simple_operation_state_t; + +template + requires __detail::has_csr_base +void scale(scale_state_t& state, + typename std::remove_reference_t::scalar_type val, A&& a) { + state.scale(val, a); +} + +template + requires __detail::has_csr_base +typename std::remove_reference_t::scalar_type +matrix_inf_norm(matrix_inf_norm_state_t& state, A&& a) { + return state.matrix_inf_norm(a); +} + +template + requires __detail::has_csr_base +typename std::remove_reference_t::scalar_type +matrix_frob_norm(matrix_frob_norm_state_t& state, A&& a) { + return state.matrix_frob_norm(a); +} + +} // namespace spblas