From 36623b8352caa50e9283ffc11ede678eef555787 Mon Sep 17 00:00:00 2001 From: zhangnju Date: Wed, 28 May 2025 00:25:14 -0500 Subject: [PATCH 1/4] fix the issue of hipblas matmul --- csrc/ops.hip | 159 +++++++++++++++++++++++++++++++++++++++++++++-- csrc/ops_hip.cuh | 18 +++++- 2 files changed, 168 insertions(+), 9 deletions(-) diff --git a/csrc/ops.hip b/csrc/ops.hip index 4dcbbecfd..a6dd72692 100644 --- a/csrc/ops.hip +++ b/csrc/ops.hip @@ -509,6 +509,7 @@ static std::string hipError_to_string(const hipError_t ret) } } + template int igemmlt(hipblasLtHandle_t ltHandle, int m, int n, int k, const int8_t *A, const int8_t *B, void *C, float *row_scale, int lda, int ldb, int ldc) { #ifdef NO_HIPBLASLT @@ -524,28 +525,101 @@ template int igemmlt(hipblasLtHandl hipblasLtOrder_t col_ampere = HIPBLASLT_ORDER_COL; has_error |= checkHipblasStatus(hipblasLtMatrixLayoutCreate(&Adesc, HIP_R_8I, m, k, lda)); + if(has_error != 0) + { + std::cout<<"failed to run hipblasLtMatrixLayoutCreate for Adesc:"< int igemmlt(hipblasLtHandl } else { - has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc,&alpha, A, Adesc, B, Bdesc, &beta, (int32_t*)C, Cdesc, (int32_t*)C, Cdesc, &heuristicResult[0].algo, nullptr, 0, 0)); + void* d_workspace=nullptr; + uint64_t workspace_size = 0; + for(int i = 0; i < returnedAlgoCount; i++) + workspace_size = max(workspace_size, heuristicResult[i].workspaceSize); + hipMalloc(&d_workspace, workspace_size); + + has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc,&alpha, A, Adesc, B, Bdesc, &beta, (int32_t*)C, Cdesc, (int32_t*)C, Cdesc, &heuristicResult[0].algo, d_workspace, workspace_size, 0)); + hipFree(d_workspace); + if(has_error != 0) + { + std::cout<<"failed to run hipblasLtMatmul"< int igemmlt(hipblasLtHandl } else { + uint64_t workspace_size = 0; + for(int i = 0; i < returnedAlgoCount; i++) + workspace_size = max(workspace_size, heuristicResult[i].workspaceSize); + void* d_workspace=nullptr; + hipMalloc(&d_workspace, workspace_size); if(!SCALE_ROWS) { float alpha = 1.0f, beta = 0.0f; - has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc,&alpha, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, &heuristicResult[0].algo, nullptr, 0, 0)); + has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc,&alpha, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, &heuristicResult[0].algo, nullptr, workspace_size, 0)); } else { float beta = 0.0f; - has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc, row_scale, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, &heuristicResult[0].algo, nullptr, 0, 0)); + has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc, row_scale, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, &heuristicResult[0].algo, nullptr, workspace_size, 0)); + + } + hipFree(d_workspace); + if(has_error != 0) + { + std::cout<<"failed to run hipblasLtMatmul with int8"< int igemmlt(hipblasLtHandl #endif // NO_HIPBLASLT } + int fill_up_to_nearest_multiple(int value, int multiple) { return value + (value % multiple == 0 ? 0 : (multiple - (value % multiple))); diff --git a/csrc/ops_hip.cuh b/csrc/ops_hip.cuh index e57cbb3b5..2bf749863 100644 --- a/csrc/ops_hip.cuh +++ b/csrc/ops_hip.cuh @@ -101,17 +101,28 @@ typedef enum Funcs_t class Context { public: - rocblas_handle m_handle; + hipblasLtHandle_t m_handle; + //rocblas_handle m_handle; Context() { - rocblas_handle handle; - rocblas_create_handle(&handle); + //rocblas_handle handle; + //rocblas_create_handle(&handle); + hipblasLtHandle_t handle; + hipblasLtCreate(&handle); m_handle = handle; + } + ~Context() + { + hipblasLtDestroy(m_handle); + + } + }; +/* class ContextLt { public: @@ -124,6 +135,7 @@ class ContextLt m_handle = handle; } }; +*/ class ContextHipsparse { From 234e1aea7549817cfe5cdbb2a90c124908bbf9bb Mon Sep 17 00:00:00 2001 From: zhangnju Date: Wed, 28 May 2025 04:40:15 -0500 Subject: [PATCH 2/4] update the patch of fixing hipblaslt matmul issue --- csrc/ops.hip | 6 ++---- csrc/ops_hip.cuh | 8 -------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/csrc/ops.hip b/csrc/ops.hip index a6dd72692..9b2f1f05e 100644 --- a/csrc/ops.hip +++ b/csrc/ops.hip @@ -658,8 +658,7 @@ template int igemmlt(hipblasLtHandl uint64_t workspace_size = 0; for(int i = 0; i < returnedAlgoCount; i++) workspace_size = max(workspace_size, heuristicResult[i].workspaceSize); - hipMalloc(&d_workspace, workspace_size); - + hipMalloc(&d_workspace, workspace_size); has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc,&alpha, A, Adesc, B, Bdesc, &beta, (int32_t*)C, Cdesc, (int32_t*)C, Cdesc, &heuristicResult[0].algo, d_workspace, workspace_size, 0)); hipFree(d_workspace); if(has_error != 0) @@ -767,8 +766,7 @@ template int igemmlt(hipblasLtHandl { float beta = 0.0f; - has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc, row_scale, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, &heuristicResult[0].algo, nullptr, workspace_size, 0)); - + has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc, row_scale, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, &heuristicResult[0].algo, nullptr, workspace_size, 0)); } hipFree(d_workspace); if(has_error != 0) diff --git a/csrc/ops_hip.cuh b/csrc/ops_hip.cuh index 2bf749863..49e02ade6 100644 --- a/csrc/ops_hip.cuh +++ b/csrc/ops_hip.cuh @@ -111,15 +111,7 @@ class Context hipblasLtHandle_t handle; hipblasLtCreate(&handle); m_handle = handle; - } - - ~Context() - { - hipblasLtDestroy(m_handle); - - } - }; /* From 5fe10e887166964dba159238d4292a6ce24849a5 Mon Sep 17 00:00:00 2001 From: zhangnju Date: Wed, 28 May 2025 04:54:21 -0500 Subject: [PATCH 3/4] update the patch of fixing hipblaslt matmul issue --- csrc/ops.hip | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/csrc/ops.hip b/csrc/ops.hip index 9b2f1f05e..d9145e691 100644 --- a/csrc/ops.hip +++ b/csrc/ops.hip @@ -658,7 +658,7 @@ template int igemmlt(hipblasLtHandl uint64_t workspace_size = 0; for(int i = 0; i < returnedAlgoCount; i++) workspace_size = max(workspace_size, heuristicResult[i].workspaceSize); - hipMalloc(&d_workspace, workspace_size); + hipMalloc(&d_workspace, workspace_size); has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc,&alpha, A, Adesc, B, Bdesc, &beta, (int32_t*)C, Cdesc, (int32_t*)C, Cdesc, &heuristicResult[0].algo, d_workspace, workspace_size, 0)); hipFree(d_workspace); if(has_error != 0) @@ -759,13 +759,11 @@ template int igemmlt(hipblasLtHandl if(!SCALE_ROWS) { float alpha = 1.0f, beta = 0.0f; - has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc,&alpha, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, &heuristicResult[0].algo, nullptr, workspace_size, 0)); } else { float beta = 0.0f; - has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc, row_scale, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, &heuristicResult[0].algo, nullptr, workspace_size, 0)); } hipFree(d_workspace); From e3238fe1ccb632962061bb325e419dc693c2507f Mon Sep 17 00:00:00 2001 From: zhangnju Date: Wed, 28 May 2025 04:58:51 -0500 Subject: [PATCH 4/4] update the patch of fixing hipblaslt matmul issue --- csrc/ops.hip | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/ops.hip b/csrc/ops.hip index d9145e691..972209cfa 100644 --- a/csrc/ops.hip +++ b/csrc/ops.hip @@ -764,7 +764,7 @@ template int igemmlt(hipblasLtHandl else { float beta = 0.0f; - has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc, row_scale, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, &heuristicResult[0].algo, nullptr, workspace_size, 0)); + has_error |= checkHipblasStatus(hipblasLtMatmul(ltHandle, matmulDesc, row_scale, A, Adesc, B, Bdesc, &beta, (int8_t*)C, Cdesc, (int8_t*)C, Cdesc, &heuristicResult[0].algo, nullptr, workspace_size, 0)); } hipFree(d_workspace); if(has_error != 0)