From 1e19e3bab29cd887bddcd6f6b5090d655b775546 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Thu, 21 Aug 2025 17:52:59 +0200
Subject: [PATCH] simple operation like scale, norm on csr

---
 include/spblas/vendor/cusparse/cusparse.hpp  |   1 +
 include/spblas/vendor/cusparse/exception.hpp |  34 +++++
 include/spblas/vendor/cusparse/simple_op.hpp | 152 +++++++++++++++++++
 3 files changed, 187 insertions(+)
 create mode 100644 include/spblas/vendor/cusparse/simple_op.hpp
diff --git a/include/spblas/vendor/cusparse/cusparse.hpp b/include/spblas/vendor/cusparse/cusparse.hpp
index 7caa698..4bc3c95 100644
--- a/include/spblas/vendor/cusparse/cusparse.hpp
+++ b/include/spblas/vendor/cusparse/cusparse.hpp
@@ -1,3 +1,4 @@
 #pragma once
 
 #include "multiply.hpp"
+#include "simple_op.hpp"
diff --git a/include/spblas/vendor/cusparse/exception.hpp b/include/spblas/vendor/cusparse/exception.hpp
index d90ac30..370da8a 100644
--- a/include/spblas/vendor/cusparse/exception.hpp
+++ b/include/spblas/vendor/cusparse/exception.hpp
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <cublas_v2.h>
 #include <cuda_runtime.h>
 #include <cusparse.h>
 #include <stdexcept>
@@ -57,6 +58,39 @@ void throw_if_error(cusparseStatus_t error_code) {
   }
 }
 
+// Throw an exception if the cublasStatus_t is not CUBLAS_STATUS_SUCCESS.
+void throw_if_error(cublasStatus_t error_code) {
+  if (error_code == CUBLAS_STATUS_SUCCESS) {
+    return;
+  } else if (error_code == CUBLAS_STATUS_NOT_INITIALIZED) {
+    throw std::runtime_error(
+        "cuBLAS encountered an error: \"CUBLAS_STATUS_NOT_INITIALIZED\"");
+  } else if (error_code == CUBLAS_STATUS_ALLOC_FAILED) {
+    throw std::runtime_error(
+        "cuBLAS encountered an error: \"CUBLAS_STATUS_ALLOC_FAILED\"");
+  } else if (error_code == CUBLAS_STATUS_INVALID_VALUE) {
+    throw std::runtime_error(
+        "cuBLAS encountered an error: \"CUBLAS_STATUS_INVALID_VALUE\"");
+  } else if (error_code == CUBLAS_STATUS_ARCH_MISMATCH) {
+    throw std::runtime_error(
+        "cuBLAS encountered an error: \"CUBLAS_STATUS_ARCH_MISMATCH\"");
+  } else if (error_code == CUBLAS_STATUS_MAPPING_ERROR) {
+    throw std::runtime_error(
+        "cuBLAS encountered an error: \"CUBLAS_STATUS_MAPPING_ERROR\"");
+  } else if (error_code == CUBLAS_STATUS_EXECUTION_FAILED) {
+    throw std::runtime_error(
+        "cuBLAS encountered an error: \"CUBLAS_STATUS_EXECUTION_FAILED\"");
+  } else if (error_code == CUBLAS_STATUS_INTERNAL_ERROR) {
+    throw std::runtime_error("cuBLAS encountered an error: "
+                             "\"CUBLAS_STATUS_INTERNAL_ERROR\"");
+  } else if (error_code == CUBLAS_STATUS_NOT_SUPPORTED) {
+    throw std::runtime_error(
+        "cuBLAS encountered an error: \"CUBLAS_STATUS_NOT_SUPPORTED\"");
+  } else {
+    throw std::runtime_error("cuBLAS encountered an error: \"unknown error\"");
+  }
+}
+
 } // namespace __cusparse
 
 } // namespace spblas
diff --git a/include/spblas/vendor/cusparse/simple_op.hpp b/include/spblas/vendor/cusparse/simple_op.hpp
new file mode 100644
index 0000000..73e6fbe
--- /dev/null
+++ b/include/spblas/vendor/cusparse/simple_op.hpp
@@ -0,0 +1,152 @@
+#pragma once
+
+#include <stdexcept>
+#include <type_traits>
+
+#include <cublas_v2.h>
+#include <cuda_runtime.h>
+
+#include <spblas/detail/operation_info_t.hpp>
+#include <spblas/detail/ranges.hpp>
+#include <spblas/detail/view_inspectors.hpp>
+
+#include "cuda_allocator.hpp"
+#include "detail/cusparse_tensors.hpp"
+#include "exception.hpp"
+#include "types.hpp"
+
+namespace spblas {
+
+class simple_operation_state_t {
+public:
+  simple_operation_state_t()
+      : simple_operation_state_t(cusparse::cuda_allocator<char>{}) {}
+
+  simple_operation_state_t(cusparse::cuda_allocator<char> alloc)
+      : alloc_(alloc) {
+    cublasHandle_t handle;
+    __cusparse::throw_if_error(cublasCreate(&handle));
+    if (auto stream = alloc.stream()) {
+      __cusparse::throw_if_error(cublasSetStream(handle, stream));
+    }
+    handle_ = handle_manager(handle, [](cublasHandle_t handle) {
+      __cusparse::throw_if_error(cublasDestroy(handle));
+    });
+  }
+
+  simple_operation_state_t(cusparse::cuda_allocator<char> alloc,
+                           cublasHandle_t handle)
+      : alloc_(alloc) {
+    handle_ = handle_manager(handle, [](cublasHandle_t handle) {
+      // it is provided by user, we do not delete it at all.
+    });
+  }
+
+  template <matrix A>
+    requires __detail::has_csr_base<A>
+  void scale(typename std::remove_reference_t<A>::scalar_type val, A&& a) {
+    auto a_base = __detail::get_ultimate_base(a);
+    using matrix_type = decltype(a_base);
+    using value_type = typename matrix_type::scalar_type;
+    if constexpr (std::is_same_v<value_type, float>) {
+      __cusparse::throw_if_error(
+          cublasSscal(handle_.get(), static_cast<int>(a_base.values().size()),
+                      &val, a_base.values().data(), 1));
+    } else if constexpr (std::is_same_v<value_type, double>) {
+      __cusparse::throw_if_error(
+          cublasDscal(handle_.get(), static_cast<int>(a_base.values().size()),
+                      &val, a_base.values().data(), 1));
+    } else {
+      throw std::runtime_error("not implemented");
+    }
+  }
+
+  template <matrix A>
+    requires __detail::has_csr_base<A>
+  typename std::remove_reference_t<A>::scalar_type matrix_inf_norm(A&& a) {
+    auto a_base = __detail::get_ultimate_base(a);
+    using matrix_type = decltype(a_base);
+    using value_type = typename matrix_type::scalar_type;
+    using index_type = typename matrix_type::index_type;
+    value_type result = 0;
+    // very slow implementation by calling cublas row by row
+    for (int i = 0; i < __backend::shape(a_base)[0]; i++) {
+      value_type tmp = 0;
+      index_type start, end;
+      __cusparse::throw_if_error(cudaMemcpy(&start, a_base.rowptr().data() + i,
+                                            sizeof(index_type),
+                                            cudaMemcpyDeviceToHost));
+      __cusparse::throw_if_error(
+          cudaMemcpy(&end, a_base.rowptr().data() + i + 1, sizeof(index_type),
+                     cudaMemcpyDeviceToHost));
+      if constexpr (std::is_same_v<value_type, float>) {
+        __cusparse::throw_if_error(cublasSasum(handle_.get(), end - start,
+                                               a_base.values().data() + start,
+                                               1, &tmp));
+      } else if constexpr (std::is_same_v<value_type, double>) {
+        __cusparse::throw_if_error(cublasDasum(handle_.get(), end - start,
+                                               a_base.values().data() + start,
+                                               1, &tmp));
+      } else {
+        throw std::runtime_error("not implemented");
+      }
+      result = std::max(result, tmp);
+    }
+    return result;
+  }
+
+  template <matrix A>
+    requires __detail::has_csr_base<A>
+  typename std::remove_reference_t<A>::scalar_type matrix_frob_norm(A&& a) {
+    auto a_base = __detail::get_ultimate_base(a);
+    using matrix_type = decltype(a_base);
+    using value_type = typename matrix_type::scalar_type;
+    value_type result(0.0);
+    if constexpr (std::is_same_v<value_type, float>) {
+      __cusparse::throw_if_error(
+          cublasSnrm2(handle_.get(), static_cast<int>(a_base.values().size()),
+                      a_base.values().data(), 1, &result));
+    } else if constexpr (std::is_same_v<value_type, double>) {
+      __cusparse::throw_if_error(
+          cublasDnrm2(handle_.get(), static_cast<int>(a_base.values().size()),
+                      a_base.values().data(), 1, &result));
+    } else {
+      throw std::runtime_error("not implemented");
+    }
+    return result;
+  }
+
+private:
+  using handle_manager =
+      std::unique_ptr<std::pointer_traits<cublasHandle_t>::element_type,
+                      std::function<void(cublasHandle_t)>>;
+  handle_manager handle_;
+  cusparse::cuda_allocator<char> alloc_;
+};
+
+using scale_state_t = simple_operation_state_t;
+using matrix_inf_norm_state_t = simple_operation_state_t;
+using matrix_frob_norm_state_t = simple_operation_state_t;
+
+template <matrix A>
+  requires __detail::has_csr_base<A>
+void scale(scale_state_t& state,
+           typename std::remove_reference_t<A>::scalar_type val, A&& a) {
+  state.scale(val, a);
+}
+
+template <matrix A>
+  requires __detail::has_csr_base<A>
+typename std::remove_reference_t<A>::scalar_type
+matrix_inf_norm(matrix_inf_norm_state_t& state, A&& a) {
+  return state.matrix_inf_norm(a);
+}
+
+template <matrix A>
+  requires __detail::has_csr_base<A>
+typename std::remove_reference_t<A>::scalar_type
+matrix_frob_norm(matrix_frob_norm_state_t& state, A&& a) {
+  return state.matrix_frob_norm(a);
+}
+
+} // namespace spblas