From 8d944568623ebbdfd4239d5be0e2b48a34a47a82 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Wed, 12 Nov 2025 17:40:58 -0500 Subject: [PATCH 01/47] Initial CPU implementation of scalAddI. Doesn't yet reuse sparsity pattern. Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandler.cpp | 25 +++++ resolve/matrix/MatrixHandler.hpp | 2 + resolve/matrix/MatrixHandlerCpu.cpp | 103 ++++++++++++++++++++ resolve/matrix/MatrixHandlerCpu.hpp | 2 + resolve/matrix/MatrixHandlerCuda.cpp | 15 +++ resolve/matrix/MatrixHandlerCuda.hpp | 2 + resolve/matrix/MatrixHandlerHip.cpp | 14 +++ resolve/matrix/MatrixHandlerHip.hpp | 5 +- resolve/matrix/MatrixHandlerImpl.hpp | 2 + tests/unit/matrix/MatrixHandlerTests.hpp | 74 +++++++++++++- tests/unit/matrix/runMatrixHandlerTests.cpp | 2 + 11 files changed, 243 insertions(+), 3 deletions(-) diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index 1d570af85..0a0699c5d 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -351,6 +351,31 @@ namespace ReSolve } } + /** + * @brief Multiply csr matrix by a constant and add I. + * @param[in,out] A - Sparse CSR matrix + * @param[in] alpha - scalar parameter + * @param[in] memspace - Device where the operation is computed + * @return 0 if successful, 1 otherwise + */ + int MatrixHandler::scaleAddI(matrix::Csr* A, real_type alpha, memory::MemorySpace memspace) + { + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix is assumed to be in CSR format.\n"); + assert(A->getValues(memspace) != nullptr && "Matrix values are null!\n"); + assert(A->getNumRows() == A->getNumColumns() && "Matrix must be square.\n"); + using namespace ReSolve::memory; + switch (memspace) + { + case HOST: + return cpuImpl_->scaleAddI(A, alpha); + break; + case DEVICE: + return devImpl_->scaleAddI(A, alpha); + break; + } + return 1; + } + /** * @brief If CUDA support is enabled in the handler. * diff --git a/resolve/matrix/MatrixHandler.hpp b/resolve/matrix/MatrixHandler.hpp index 6b6785aba..312d02e68 100644 --- a/resolve/matrix/MatrixHandler.hpp +++ b/resolve/matrix/MatrixHandler.hpp @@ -62,6 +62,8 @@ namespace ReSolve void addConst(matrix::Sparse* A, real_type alpha, memory::MemorySpace memspace); + int scaleAddI(matrix::Csr* A, real_type alpha, memory::MemorySpace memspace); + /// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped int matvec(matrix::Sparse* A, vector_type* vec_x, diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index eecd48fbe..28561c835 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -389,4 +389,107 @@ namespace ReSolve } return 0; } + + /** + * @brief Multiply all nonzero values of a csr matrix by a constant + * + * @param[in,out] A - Sparse CSR matrix + * @param[in] alpha - constant to the added + * @return 0 if successful, 1 otherwise + */ + static int scaleConst(matrix::Sparse* A, real_type alpha) + { + real_type* values = A->getValues(memory::HOST); + const index_type nnz = A->getNnz(); + for (index_type i = 0; i < nnz; ++i) + { + values[i] *= alpha; + } + return 0; + } + + /** + * @brief Add a constant to the nonzero values of a csr matrix, + * then add the identity matrix. + * + * @param[in,out] A - Sparse CSR matrix + * @param[in] alpha - constant to the added + * @return 0 if successful, 1 otherwise + */ + int MatrixHandlerCpu::scaleAddI(matrix::Csr* A, real_type alpha) + { + scaleConst(A, alpha); + + auto new_row_pointers = new index_type[A->getNumRows() + 1]; + // At most we add one element per row/column + auto new_col_indices = new index_type[A->getNnz() + A->getNumRows()]; + auto new_values = new real_type[A->getNnz() + A->getNumRows()]; + + index_type const* const original_row_pointers = A->getRowData(memory::HOST); + index_type const* const original_col_indices = A->getColData(memory::HOST); + real_type const* const original_values = A->getValues(memory::HOST); + + index_type new_nnz_count = 0; + for (index_type i = 0; i < A->getNumRows(); ++i) + { + new_row_pointers[i] = new_nnz_count; + const index_type original_row_start = original_row_pointers[i]; + const index_type original_row_end = original_row_pointers[i + 1]; + + bool diagonal_added = false; + for (index_type j = original_row_start; j < original_row_end; ++j) + { + if (original_col_indices[j] == i) + { + // Diagonal element found in original matrix + new_values[new_nnz_count] = original_values[j] + 1.0; + new_col_indices[new_nnz_count] = i; + new_nnz_count++; + diagonal_added = true; + } + else if (original_col_indices[j] < i && !diagonal_added) + { + // Handle elements before diagonal + new_values[new_nnz_count] = original_values[j]; + new_col_indices[new_nnz_count] = original_col_indices[j]; + new_nnz_count++; + } + else if (original_col_indices[j] > i && !diagonal_added) + { + // Insert diagonal if not found yet + new_values[new_nnz_count] = 1.; + new_col_indices[new_nnz_count] = i; + new_nnz_count++; + diagonal_added = true; // Mark as added to prevent re-insertion + // Then add the current original element + new_values[new_nnz_count] = original_values[j]; + new_col_indices[new_nnz_count] = original_col_indices[j]; + new_nnz_count++; + } + else + { + // Elements after diagonal or diagonal already handled + new_values[new_nnz_count] = original_values[j]; + new_col_indices[new_nnz_count] = original_col_indices[j]; + new_nnz_count++; + } + } + + // If diagonal element was not present in original row + if (!diagonal_added) + { + new_values[new_nnz_count] = 1.; + new_col_indices[new_nnz_count] = i; + new_nnz_count++; + } + } + new_row_pointers[A->getNumRows()] = new_nnz_count; + + A->destroyMatrixData(memory::HOST); + A->setNnz(new_nnz_count); + A->setDataPointers(new_row_pointers, new_col_indices, new_values, memory::HOST); + A->setUpdated(memory::HOST); + + return 0; + } } // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerCpu.hpp b/resolve/matrix/MatrixHandlerCpu.hpp index 66b810787..77a4b717c 100644 --- a/resolve/matrix/MatrixHandlerCpu.hpp +++ b/resolve/matrix/MatrixHandlerCpu.hpp @@ -46,6 +46,8 @@ namespace ReSolve int addConst(matrix::Sparse* A, real_type alpha) override; + int scaleAddI(matrix::Csr* A, real_type alpha) override; + virtual int matvec(matrix::Sparse* A, vector_type* vec_x, vector_type* vec_result, diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index 7b3108438..b56cb8fe0 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -400,4 +400,19 @@ namespace ReSolve cuda::addConst(nnz, alpha, values); return 0; } + + /** + * @brief Add a constant to the nonzero values of a csr matrix, + * then add the identity matrix. + * + * @param[in,out] A - Sparse CSR matrix + * @param[in] alpha - constant to the added + * @return 0 if successful, 1 otherwise + */ + int MatrixHandlerCpu::scaleAddI(matrix::Csr* A, real_type alpha) + { + // NOT IMPLEMENTED + return 1; + } + } // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerCuda.hpp b/resolve/matrix/MatrixHandlerCuda.hpp index f0efbcee8..b85c39faf 100644 --- a/resolve/matrix/MatrixHandlerCuda.hpp +++ b/resolve/matrix/MatrixHandlerCuda.hpp @@ -45,6 +45,8 @@ namespace ReSolve int rightScale(matrix::Csr* A, vector_type* diag) override; + int scaleAddI(matrix::Csr* A, real_type alpha) override; + virtual int matvec(matrix::Sparse* A, vector_type* vec_x, vector_type* vec_result, diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp index 17a31bc3d..a7e3de020 100644 --- a/resolve/matrix/MatrixHandlerHip.cpp +++ b/resolve/matrix/MatrixHandlerHip.cpp @@ -369,4 +369,18 @@ namespace ReSolve return 0; } + /** + * @brief Add a constant to the nonzero values of a csr matrix, + * then add the identity matrix. + * + * @param[in,out] A - Sparse CSR matrix + * @param[in] alpha - constant to the added + * @return 0 if successful, 1 otherwise + */ + int MatrixHandlerCpu::scaleAddI(matrix::Csr* A, real_type alpha) + { + // NOT IMPLEMENTED + return 1; + } + } // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerHip.hpp b/resolve/matrix/MatrixHandlerHip.hpp index 7e69c3f1c..8f14cc42f 100644 --- a/resolve/matrix/MatrixHandlerHip.hpp +++ b/resolve/matrix/MatrixHandlerHip.hpp @@ -43,7 +43,10 @@ namespace ReSolve int rightScale(matrix::Csr* A, vector_type* diag) override; - int addConst(matrix::Sparse* A, real_type alpha) override; + int addConst(matrix::Sparse* A, real_type alpha) override; + + int scaleAddI(matrix::Csr* A, real_type alpha) override; + virtual int matvec(matrix::Sparse* A, vector_type* vec_x, vector_type* vec_result, diff --git a/resolve/matrix/MatrixHandlerImpl.hpp b/resolve/matrix/MatrixHandlerImpl.hpp index d874dff16..771ab4839 100644 --- a/resolve/matrix/MatrixHandlerImpl.hpp +++ b/resolve/matrix/MatrixHandlerImpl.hpp @@ -46,6 +46,8 @@ namespace ReSolve virtual int addConst(matrix::Sparse* A, real_type alpha) = 0; + virtual int scaleAddI(matrix::Csr* A, real_type alpha) = 0; + virtual int matvec(matrix::Sparse* A, vector_type* vec_x, vector_type* vec_result, diff --git a/tests/unit/matrix/MatrixHandlerTests.hpp b/tests/unit/matrix/MatrixHandlerTests.hpp index e6fb0eac9..79c564dd7 100644 --- a/tests/unit/matrix/MatrixHandlerTests.hpp +++ b/tests/unit/matrix/MatrixHandlerTests.hpp @@ -202,6 +202,24 @@ namespace ReSolve return status.report(testname.c_str()); } + TestOutcome scaleAddI(index_type n) + { + TestStatus status; + std::string testname(__func__); + matrix::Csr* A = createCsrMatrix(n); + real_type val = 2.; + + handler_.scaleAddI(A, val, memspace_); + if (memspace_ == memory::DEVICE) + { + A->syncData(memory::HOST); + } + status *= verifyScaleAddICsrMatrix(A, val); + + delete A; + return status.report(testname.c_str()); + } + private: ReSolve::MatrixHandler& handler_; memory::MemorySpace memspace_{memory::HOST}; @@ -402,8 +420,8 @@ namespace ReSolve * @brief Verify structure of a CSR matrix with preset pattern. * * The sparsity structure corresponds to the CSR representation of a rectangular matrix - * created by createRectangularCscMatrix. - * The sparisty structure is upper bidiagonal if n==m, + * created by createRectangularCsrMatrix. + * The sparsity structure is upper bidiagonal if n==m, * with an extra entry in the first column. * If n>m A_{ij} is nonzero iff i==j, or i+m==j+n * if ngetNumRows(); + const index_type m = A->getNumColumns(); + + if(n != m) + { + return false; + } + + index_type* scaled_row_ptr = A->getRowData(memory::HOST); + index_type* scaled_col_idx = A->getColData(memory::HOST); + real_type* scaled_value = A->getValues(memory::HOST); + + const real_type expected = 30. * scale + 1.; + + // Verify values - each element scaled by scale. Diagonal elements should be 1. + for (index_type i = 0; i < n; ++i) + { + real_type sum = 0.; + for (index_type j = scaled_row_ptr[i]; j < scaled_row_ptr[i + 1]; ++j) + { + sum += scaled_value[j]; + } + if (sum != expected) + { + std::cout << "Mismatch at row " << i << ": scaled sum = " << sum << ", expected sum = " << expected << "\n"; + return false; + } + } + + return true; + } + /** * @brief Verify structure of a CSR matrix with preset pattern that is right-scaled * diff --git a/tests/unit/matrix/runMatrixHandlerTests.cpp b/tests/unit/matrix/runMatrixHandlerTests.cpp index 84e9a6011..4e9cbbc90 100644 --- a/tests/unit/matrix/runMatrixHandlerTests.cpp +++ b/tests/unit/matrix/runMatrixHandlerTests.cpp @@ -76,6 +76,8 @@ void runTests(const std::string& backend, ReSolve::tests::TestingResults& result result += test.rightScale(1024, 2048); workspace.resetLinAlgWorkspace(); result += test.rightScale(2048, 1024); + workspace.resetLinAlgWorkspace(); + result += test.scaleAddI(100); std::cout << "\n"; } From e8878b8483f010c9fe70ab76fff4703120ba3600 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Thu, 13 Nov 2025 21:04:53 -0500 Subject: [PATCH 02/47] cache sparsity pattern Signed-off-by: Steven Hahn --- resolve/matrix/CMakeLists.txt | 2 +- resolve/matrix/MatrixHandlerCpu.cpp | 92 ++++++++++++++++++++++++ resolve/workspace/LinAlgWorkspaceCpu.cpp | 28 +++++++- resolve/workspace/LinAlgWorkspaceCpu.hpp | 20 ++++++ tests/unit/matrix/MatrixHandlerTests.hpp | 1 - 5 files changed, 139 insertions(+), 4 deletions(-) diff --git a/resolve/matrix/CMakeLists.txt b/resolve/matrix/CMakeLists.txt index 9098d8c07..39d02f423 100644 --- a/resolve/matrix/CMakeLists.txt +++ b/resolve/matrix/CMakeLists.txt @@ -30,7 +30,7 @@ set(Matrix_HEADER_INSTALL io.hpp Sparse.hpp Coo.hpp Csr.hpp Csc.hpp # Build shared library ReSolve::matrix add_library(resolve_matrix SHARED ${Matrix_SRC}) -target_link_libraries(resolve_matrix PRIVATE resolve_logger resolve_vector) +target_link_libraries(resolve_matrix PRIVATE resolve_logger resolve_vector resolve_workspace) # Link to CUDA ReSolve backend if CUDA is support enabled if(RESOLVE_USE_CUDA) diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index 28561c835..99ffd0275 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace ReSolve { @@ -408,6 +409,84 @@ namespace ReSolve return 0; } + /** + * @brief Add a constant to the nonzero values of a csr matrix, + * then add the identity matrix. + * + * @param[in,out] A - Sparse CSR matrix + * @param[in] alpha - constant to the added + * @return 0 if successful, 1 otherwise + */ + static int scaleAddII(matrix::Csr* A, real_type alpha, ScaleAddIBuffer* pattern) + { + scaleConst(A, alpha); + + auto new_row_pointers = new index_type[A->getNumRows() + 1]; + std::copy(pattern->row_data_.begin(), pattern->row_data_.end(), new_row_pointers); + auto new_col_indices = new index_type[pattern->nnz]; + std::copy(pattern->col_data_.begin(), pattern->col_data_.end(), new_col_indices); + auto new_values = new real_type[pattern->nnz]; + + index_type const* const original_row_pointers = A->getRowData(memory::HOST); + index_type const* const original_col_indices = A->getColData(memory::HOST); + real_type const* const original_values = A->getValues(memory::HOST); + + index_type new_nnz_count = 0; + for (index_type i = 0; i < A->getNumRows(); ++i) + { + const index_type original_row_start = original_row_pointers[i]; + const index_type original_row_end = original_row_pointers[i + 1]; + + bool diagonal_added = false; + for (index_type j = original_row_start; j < original_row_end; ++j) + { + if (original_col_indices[j] == i) + { + // Diagonal element found in original matrix + new_values[new_nnz_count] = original_values[j] + 1.0; + new_nnz_count++; + diagonal_added = true; + } + else if (original_col_indices[j] < i && !diagonal_added) + { + // Handle elements before diagonal + new_values[new_nnz_count] = original_values[j]; + new_nnz_count++; + } + else if (original_col_indices[j] > i && !diagonal_added) + { + // Insert diagonal if not found yet + new_values[new_nnz_count] = 1.; + new_nnz_count++; + diagonal_added = true; // Mark as added to prevent re-insertion + // Then add the current original element + new_values[new_nnz_count] = original_values[j]; + new_nnz_count++; + } + else + { + // Elements after diagonal or diagonal already handled + new_values[new_nnz_count] = original_values[j]; + new_nnz_count++; + } + } + + // If diagonal element was not present in original row + if (!diagonal_added) + { + new_values[new_nnz_count] = 1.; + new_nnz_count++; + } + } + + A->destroyMatrixData(memory::HOST); + A->setNnz(new_nnz_count); + A->setDataPointers(new_row_pointers, new_col_indices, new_values, memory::HOST); + A->setUpdated(memory::HOST); + + return 0; + } + /** * @brief Add a constant to the nonzero values of a csr matrix, * then add the identity matrix. @@ -418,6 +497,11 @@ namespace ReSolve */ int MatrixHandlerCpu::scaleAddI(matrix::Csr* A, real_type alpha) { + if (workspace_->scaleAddISetup()) + { + ScaleAddIBuffer* pattern = workspace_->getScaleAddIBuffer(); + return scaleAddII(A, alpha, pattern); + } scaleConst(A, alpha); auto new_row_pointers = new index_type[A->getNumRows() + 1]; @@ -490,6 +574,14 @@ namespace ReSolve A->setDataPointers(new_row_pointers, new_col_indices, new_values, memory::HOST); A->setUpdated(memory::HOST); + auto sparsity = new ScaleAddIBuffer; + sparsity->row_data_.resize(A->getNumRows() + 1); + std::copy_n(A->getRowData(memory::HOST), A->getNumRows() + 1, sparsity->row_data_.begin()); + sparsity->col_data_.resize(A->getNnz()); + std::copy_n(A->getColData(memory::HOST), A->getNnz(), sparsity->col_data_.begin()); + sparsity->nnz = A->getNnz(); + workspace_->setScaleAddIBuffer(sparsity); + return 0; } } // namespace ReSolve diff --git a/resolve/workspace/LinAlgWorkspaceCpu.cpp b/resolve/workspace/LinAlgWorkspaceCpu.cpp index 36f9d6cf0..e3708aa23 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.cpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.cpp @@ -1,5 +1,6 @@ #include "LinAlgWorkspaceCpu.hpp" +#include #include namespace ReSolve @@ -10,6 +11,7 @@ namespace ReSolve LinAlgWorkspaceCpu::~LinAlgWorkspaceCpu() { + delete scaleaddi_buffer_; } void LinAlgWorkspaceCpu::initializeHandles() @@ -18,7 +20,29 @@ namespace ReSolve void LinAlgWorkspaceCpu::resetLinAlgWorkspace() { - // No resources to reset in CPU workspace - return; + delete scaleaddi_buffer_; + scaleaddi_buffer_ = nullptr; } + + bool LinAlgWorkspaceCpu::scaleAddISetup() + { + return scaleaddi_setup_done_; + } + + void LinAlgWorkspaceCpu::scaleAddISetupDone() + { + scaleaddi_setup_done_ = true; + } + + ScaleAddIBuffer* LinAlgWorkspaceCpu::getScaleAddIBuffer() + { + return scaleaddi_buffer_; + } + + void LinAlgWorkspaceCpu::setScaleAddIBuffer(ScaleAddIBuffer* buffer) + { + assert(scaleaddi_buffer_ == nullptr); + scaleaddi_buffer_ = buffer; + } + } // namespace ReSolve diff --git a/resolve/workspace/LinAlgWorkspaceCpu.hpp b/resolve/workspace/LinAlgWorkspaceCpu.hpp index 308c2497c..a1677dfad 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.hpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.hpp @@ -1,7 +1,18 @@ #pragma once +#include + +#include + namespace ReSolve { + struct ScaleAddIBuffer + { + std::vector row_data_; ///< row data (HOST) + std::vector col_data_; ///< column data (HOST) + index_type nnz; + }; + class LinAlgWorkspaceCpu { public: @@ -9,6 +20,15 @@ namespace ReSolve ~LinAlgWorkspaceCpu(); void initializeHandles(); void resetLinAlgWorkspace(); + bool scaleAddISetup(); + void scaleAddISetupDone(); + ScaleAddIBuffer* getScaleAddIBuffer(); + void setScaleAddIBuffer(ScaleAddIBuffer* buffer); + + private: + // check if setup is done for scaleAddI i.e. if buffer is allocated, csr structure is set etc. + bool scaleaddi_setup_done_{false}; + ScaleAddIBuffer* scaleaddi_buffer_{nullptr}; }; } // namespace ReSolve diff --git a/tests/unit/matrix/MatrixHandlerTests.hpp b/tests/unit/matrix/MatrixHandlerTests.hpp index 79c564dd7..80f0fbd78 100644 --- a/tests/unit/matrix/MatrixHandlerTests.hpp +++ b/tests/unit/matrix/MatrixHandlerTests.hpp @@ -725,7 +725,6 @@ namespace ReSolve } index_type* scaled_row_ptr = A->getRowData(memory::HOST); - index_type* scaled_col_idx = A->getColData(memory::HOST); real_type* scaled_value = A->getValues(memory::HOST); const real_type expected = 30. * scale + 1.; From de25a65622a6589b9b42f2de3adae92398d76df9 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Fri, 14 Nov 2025 10:07:52 -0500 Subject: [PATCH 03/47] unnecessary branch Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandlerCpu.cpp | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index 99ffd0275..f2637ac74 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -447,12 +447,6 @@ namespace ReSolve new_nnz_count++; diagonal_added = true; } - else if (original_col_indices[j] < i && !diagonal_added) - { - // Handle elements before diagonal - new_values[new_nnz_count] = original_values[j]; - new_nnz_count++; - } else if (original_col_indices[j] > i && !diagonal_added) { // Insert diagonal if not found yet @@ -465,7 +459,8 @@ namespace ReSolve } else { - // Elements after diagonal or diagonal already handled + // Elements before diagonal, elements after diagonal and the + // diagonal is already handled new_values[new_nnz_count] = original_values[j]; new_nnz_count++; } @@ -531,13 +526,6 @@ namespace ReSolve new_nnz_count++; diagonal_added = true; } - else if (original_col_indices[j] < i && !diagonal_added) - { - // Handle elements before diagonal - new_values[new_nnz_count] = original_values[j]; - new_col_indices[new_nnz_count] = original_col_indices[j]; - new_nnz_count++; - } else if (original_col_indices[j] > i && !diagonal_added) { // Insert diagonal if not found yet @@ -552,7 +540,8 @@ namespace ReSolve } else { - // Elements after diagonal or diagonal already handled + // Elements before diagonal, elements after diagonal and the + // diagonal is already handled new_values[new_nnz_count] = original_values[j]; new_col_indices[new_nnz_count] = original_col_indices[j]; new_nnz_count++; From 5c4f9030a7e3141469953e1b511e0f627e437de4 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Fri, 14 Nov 2025 14:40:55 -0500 Subject: [PATCH 04/47] memory leak! Signed-off-by: Steven Hahn --- resolve/matrix/CMakeLists.txt | 2 +- resolve/matrix/MatrixHandlerCpu.cpp | 65 ++++++++++++++--------- resolve/workspace/LinAlgWorkspaceCpu.cpp | 67 ++++++++++++++++++++++++ resolve/workspace/LinAlgWorkspaceCpu.hpp | 16 ++++-- 4 files changed, 119 insertions(+), 31 deletions(-) diff --git a/resolve/matrix/CMakeLists.txt b/resolve/matrix/CMakeLists.txt index 39d02f423..9098d8c07 100644 --- a/resolve/matrix/CMakeLists.txt +++ b/resolve/matrix/CMakeLists.txt @@ -30,7 +30,7 @@ set(Matrix_HEADER_INSTALL io.hpp Sparse.hpp Coo.hpp Csr.hpp Csc.hpp # Build shared library ReSolve::matrix add_library(resolve_matrix SHARED ${Matrix_SRC}) -target_link_libraries(resolve_matrix PRIVATE resolve_logger resolve_vector resolve_workspace) +target_link_libraries(resolve_matrix PRIVATE resolve_logger resolve_vector) # Link to CUDA ReSolve backend if CUDA is support enabled if(RESOLVE_USE_CUDA) diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index f2637ac74..0dcf80d53 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -392,9 +392,9 @@ namespace ReSolve } /** - * @brief Multiply all nonzero values of a csr matrix by a constant + * @brief Multiply all nonzero values of a sparse matrix by a constant * - * @param[in,out] A - Sparse CSR matrix + * @param[in,out] A - Sparse matrix * @param[in] alpha - constant to the added * @return 0 if successful, 1 otherwise */ @@ -410,22 +410,41 @@ namespace ReSolve } /** - * @brief Add a constant to the nonzero values of a csr matrix, - * then add the identity matrix. + * @brief Update matrix with a new number of nonzero elements + * + * @param[in,out] A - Sparse matrix + * @param[in] row_data - pointer to row data (array of integers) + * @param[in] col_data - pointer to column data (array of integers) + * @param[in] val_data - pointer to value data (array of real numbers) + * @param[in] nnz - number of non-zer + * @return 0 if successful, 1 otherwise + */ + static void updateMatrix(matrix::Sparse* A, index_type* row_data, index_type* col_data, real_type* val_data, index_type nnz) + { + A->destroyMatrixData(memory::HOST); + A->setNnz(nnz); + A->setDataPointers(row_data, col_data, val_data, memory::HOST); + A->setUpdated(memory::HOST); + } + + /** + * @brief Multiply a csr matrix by a constant, + * then add the identity matrix. * * @param[in,out] A - Sparse CSR matrix * @param[in] alpha - constant to the added + * @param[in] pattern - precalculated sparsity pattern * @return 0 if successful, 1 otherwise */ static int scaleAddII(matrix::Csr* A, real_type alpha, ScaleAddIBuffer* pattern) { scaleConst(A, alpha); - auto new_row_pointers = new index_type[A->getNumRows() + 1]; - std::copy(pattern->row_data_.begin(), pattern->row_data_.end(), new_row_pointers); - auto new_col_indices = new index_type[pattern->nnz]; - std::copy(pattern->col_data_.begin(), pattern->col_data_.end(), new_col_indices); - auto new_values = new real_type[pattern->nnz]; + auto new_row_pointers = new index_type[pattern->getNumRows() + 1]; + pattern->getRowData(new_row_pointers, pattern->getNumRows() + 1); + auto new_col_indices = new index_type[pattern->getNnz()]; + pattern->getColumnData(new_col_indices, pattern->getNnz()); + auto new_values = new real_type[pattern->getNnz()]; index_type const* const original_row_pointers = A->getRowData(memory::HOST); index_type const* const original_col_indices = A->getColData(memory::HOST); @@ -474,10 +493,8 @@ namespace ReSolve } } - A->destroyMatrixData(memory::HOST); - A->setNnz(new_nnz_count); - A->setDataPointers(new_row_pointers, new_col_indices, new_values, memory::HOST); - A->setUpdated(memory::HOST); + assert(new_nnz_count == pattern->getNnz()); + updateMatrix(A, new_row_pointers, new_col_indices, new_values, new_nnz_count); return 0; } @@ -492,17 +509,20 @@ namespace ReSolve */ int MatrixHandlerCpu::scaleAddI(matrix::Csr* A, real_type alpha) { + // Reuse sparsity pattern if it is available if (workspace_->scaleAddISetup()) { ScaleAddIBuffer* pattern = workspace_->getScaleAddIBuffer(); return scaleAddII(A, alpha, pattern); } + scaleConst(A, alpha); auto new_row_pointers = new index_type[A->getNumRows() + 1]; // At most we add one element per row/column - auto new_col_indices = new index_type[A->getNnz() + A->getNumRows()]; - auto new_values = new real_type[A->getNnz() + A->getNumRows()]; + index_type max_nnz_count = A->getNnz() + A->getNumRows(); + auto new_col_indices = new index_type[max_nnz_count]; + auto new_values = new real_type[max_nnz_count]; index_type const* const original_row_pointers = A->getRowData(memory::HOST); index_type const* const original_col_indices = A->getColData(memory::HOST); @@ -557,19 +577,12 @@ namespace ReSolve } } new_row_pointers[A->getNumRows()] = new_nnz_count; + assert(new_nnz_count <= max_nnz_count); - A->destroyMatrixData(memory::HOST); - A->setNnz(new_nnz_count); - A->setDataPointers(new_row_pointers, new_col_indices, new_values, memory::HOST); - A->setUpdated(memory::HOST); + updateMatrix(A, new_row_pointers, new_col_indices, new_values, new_nnz_count); - auto sparsity = new ScaleAddIBuffer; - sparsity->row_data_.resize(A->getNumRows() + 1); - std::copy_n(A->getRowData(memory::HOST), A->getNumRows() + 1, sparsity->row_data_.begin()); - sparsity->col_data_.resize(A->getNnz()); - std::copy_n(A->getColData(memory::HOST), A->getNnz(), sparsity->col_data_.begin()); - sparsity->nnz = A->getNnz(); - workspace_->setScaleAddIBuffer(sparsity); + auto sparsity_pattern = new ScaleAddIBuffer(A->getRowData(memory::HOST), A->getNumRows() + 1, A->getColData(memory::HOST), A->getNnz()); + workspace_->setScaleAddIBuffer(sparsity_pattern); return 0; } diff --git a/resolve/workspace/LinAlgWorkspaceCpu.cpp b/resolve/workspace/LinAlgWorkspaceCpu.cpp index e3708aa23..c65b4a867 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.cpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.cpp @@ -1,10 +1,76 @@ #include "LinAlgWorkspaceCpu.hpp" +#include #include #include namespace ReSolve { + /** + * @brief Store sparsity pattern + * + * @param[in] row_data - pointer to row data (array of integers, length:nrows+1) + * @param[in] nrows - number of rows + * @param[in] col_data - pointer to column data (array of integers, length: nnz) + * @param[in] nnz - number of non-zeros + */ + ScaleAddIBuffer::ScaleAddIBuffer(index_type* row_data, index_type nrows, index_type* col_data, index_type nnz) + : row_data_(row_data, row_data + nrows + 1), col_data_(col_data, col_data + nnz) + { + } + + void ScaleAddIBuffer::setRowData(index_type* row_data, index_type array_size) + { + assert(static_cast(array_size) == row_data_.size()); + std::copy(row_data_.begin(), row_data_.end(), row_data); + } + + void ScaleAddIBuffer::setColumnData(index_type* col_data, index_type array_size) + { + assert(static_cast(array_size) == col_data_.size()); + std::copy(col_data_.begin(), col_data_.end(), col_data); + } + + index_type* ScaleAddIBuffer::getRowData() + { + return row_data_.data(); + } + + index_type* ScaleAddIBuffer::getColumnData() + { + return col_data_.data(); + } + + /** + * @brief get number of matrix rows + * + * @return number of matrix rows. + */ + index_type ScaleAddIBuffer::getNumRows() + { + return static_cast(row_data_.size()) - 1; + } + + /** + * @brief get number of matrix columns + * + * @return number of matrix columns. + */ + index_type ScaleAddIBuffer::getNumColumns() + { + return getNumRows(); + } + + /** + * @brief Get number of non-zeros. + * + * @return number of non-zeros + */ + index_type ScaleAddIBuffer::getNnz() + { + return static_cast(col_data_.size()); + } + LinAlgWorkspaceCpu::LinAlgWorkspaceCpu() { } @@ -36,6 +102,7 @@ namespace ReSolve ScaleAddIBuffer* LinAlgWorkspaceCpu::getScaleAddIBuffer() { + assert(ScaleAddIBuffer != nullptr); return scaleaddi_buffer_; } diff --git a/resolve/workspace/LinAlgWorkspaceCpu.hpp b/resolve/workspace/LinAlgWorkspaceCpu.hpp index a1677dfad..bd5ada632 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.hpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.hpp @@ -6,11 +6,19 @@ namespace ReSolve { - struct ScaleAddIBuffer + class ScaleAddIBuffer { - std::vector row_data_; ///< row data (HOST) - std::vector col_data_; ///< column data (HOST) - index_type nnz; + public: + ScaleAddIBuffer(index_type* row_data, index_type nrows, index_type* col_data, index_type nnz); + void getRowData(index_type* row_data, index_type array_size); + void getColumnData(index_type* col_data, index_type array_size); + index_type getNumRows(); + index_type getNumColumns(); + index_type getNnz(); + + private: + std::vector row_data_; + std::vector col_data_; }; class LinAlgWorkspaceCpu From 473146f3330baaca7373451367cf0c4918609bc2 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Fri, 14 Nov 2025 14:54:19 -0500 Subject: [PATCH 05/47] fix memory leak? Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandlerCpu.cpp | 18 ++++++++---------- resolve/workspace/LinAlgWorkspaceCpu.cpp | 14 +------------- resolve/workspace/LinAlgWorkspaceCpu.hpp | 4 ++-- 3 files changed, 11 insertions(+), 25 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index 0dcf80d53..4616ebdab 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -419,12 +419,10 @@ namespace ReSolve * @param[in] nnz - number of non-zer * @return 0 if successful, 1 otherwise */ - static void updateMatrix(matrix::Sparse* A, index_type* row_data, index_type* col_data, real_type* val_data, index_type nnz) + static int updateMatrix(matrix::Sparse* A, index_type* row_data, index_type* col_data, real_type* val_data, index_type nnz) { A->destroyMatrixData(memory::HOST); - A->setNnz(nnz); - A->setDataPointers(row_data, col_data, val_data, memory::HOST); - A->setUpdated(memory::HOST); + return A->copyDataFrom(row_data, col_data, val_data, nnz, memory::HOST, memory::HOST); } /** @@ -440,10 +438,6 @@ namespace ReSolve { scaleConst(A, alpha); - auto new_row_pointers = new index_type[pattern->getNumRows() + 1]; - pattern->getRowData(new_row_pointers, pattern->getNumRows() + 1); - auto new_col_indices = new index_type[pattern->getNnz()]; - pattern->getColumnData(new_col_indices, pattern->getNnz()); auto new_values = new real_type[pattern->getNnz()]; index_type const* const original_row_pointers = A->getRowData(memory::HOST); @@ -494,8 +488,8 @@ namespace ReSolve } assert(new_nnz_count == pattern->getNnz()); - updateMatrix(A, new_row_pointers, new_col_indices, new_values, new_nnz_count); - + updateMatrix(A, pattern->getRowData(), pattern->getColumnData(), new_values, pattern->getNnz()); + delete[] new_values; return 0; } @@ -584,6 +578,10 @@ namespace ReSolve auto sparsity_pattern = new ScaleAddIBuffer(A->getRowData(memory::HOST), A->getNumRows() + 1, A->getColData(memory::HOST), A->getNnz()); workspace_->setScaleAddIBuffer(sparsity_pattern); + delete[] new_row_pointers; + delete[] new_col_indices; + delete[] new_values; + return 0; } } // namespace ReSolve diff --git a/resolve/workspace/LinAlgWorkspaceCpu.cpp b/resolve/workspace/LinAlgWorkspaceCpu.cpp index c65b4a867..27cb85d74 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.cpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.cpp @@ -19,18 +19,6 @@ namespace ReSolve { } - void ScaleAddIBuffer::setRowData(index_type* row_data, index_type array_size) - { - assert(static_cast(array_size) == row_data_.size()); - std::copy(row_data_.begin(), row_data_.end(), row_data); - } - - void ScaleAddIBuffer::setColumnData(index_type* col_data, index_type array_size) - { - assert(static_cast(array_size) == col_data_.size()); - std::copy(col_data_.begin(), col_data_.end(), col_data); - } - index_type* ScaleAddIBuffer::getRowData() { return row_data_.data(); @@ -102,7 +90,7 @@ namespace ReSolve ScaleAddIBuffer* LinAlgWorkspaceCpu::getScaleAddIBuffer() { - assert(ScaleAddIBuffer != nullptr); + assert(scaleaddi_buffer_ != nullptr); return scaleaddi_buffer_; } diff --git a/resolve/workspace/LinAlgWorkspaceCpu.hpp b/resolve/workspace/LinAlgWorkspaceCpu.hpp index bd5ada632..c020285cf 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.hpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.hpp @@ -10,8 +10,8 @@ namespace ReSolve { public: ScaleAddIBuffer(index_type* row_data, index_type nrows, index_type* col_data, index_type nnz); - void getRowData(index_type* row_data, index_type array_size); - void getColumnData(index_type* col_data, index_type array_size); + index_type* getRowData(); + index_type* getColumnData(); index_type getNumRows(); index_type getNumColumns(); index_type getNnz(); From 56092da18179d0dfd5fe01c7f3ecab17fb0d00c5 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Fri, 14 Nov 2025 14:58:35 -0500 Subject: [PATCH 06/47] update function name Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandlerCpu.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index 4616ebdab..970511267 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -434,7 +434,7 @@ namespace ReSolve * @param[in] pattern - precalculated sparsity pattern * @return 0 if successful, 1 otherwise */ - static int scaleAddII(matrix::Csr* A, real_type alpha, ScaleAddIBuffer* pattern) + static int scaleAddIWithPattern(matrix::Csr* A, real_type alpha, ScaleAddIBuffer* pattern) { scaleConst(A, alpha); @@ -507,7 +507,7 @@ namespace ReSolve if (workspace_->scaleAddISetup()) { ScaleAddIBuffer* pattern = workspace_->getScaleAddIBuffer(); - return scaleAddII(A, alpha, pattern); + return scaleAddIWithPattern(A, alpha, pattern); } scaleConst(A, alpha); From 25eedad46b4c324cf3510a97ed1ccf7919e0403a Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Fri, 14 Nov 2025 16:18:05 -0500 Subject: [PATCH 07/47] cleanup Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandlerCpu.cpp | 44 ++++++++++++------------ resolve/workspace/LinAlgWorkspaceCpu.cpp | 4 +-- resolve/workspace/LinAlgWorkspaceCpu.hpp | 2 +- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index 970511267..9b713fcde 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -421,23 +421,22 @@ namespace ReSolve */ static int updateMatrix(matrix::Sparse* A, index_type* row_data, index_type* col_data, real_type* val_data, index_type nnz) { - A->destroyMatrixData(memory::HOST); + if (A->destroyMatrixData(memory::HOST) == -1) + { + return 1; + } return A->copyDataFrom(row_data, col_data, val_data, nnz, memory::HOST, memory::HOST); } /** - * @brief Multiply a csr matrix by a constant, - * then add the identity matrix. + * @brief Add the identity to a CSR matrix. * * @param[in,out] A - Sparse CSR matrix - * @param[in] alpha - constant to the added * @param[in] pattern - precalculated sparsity pattern * @return 0 if successful, 1 otherwise */ - static int scaleAddIWithPattern(matrix::Csr* A, real_type alpha, ScaleAddIBuffer* pattern) + static int addIWithPattern(matrix::Csr* A, ScaleAddIBuffer* pattern) { - scaleConst(A, alpha); - auto new_values = new real_type[pattern->getNnz()]; index_type const* const original_row_pointers = A->getRowData(memory::HOST); @@ -488,9 +487,9 @@ namespace ReSolve } assert(new_nnz_count == pattern->getNnz()); - updateMatrix(A, pattern->getRowData(), pattern->getColumnData(), new_values, pattern->getNnz()); + index_type info = updateMatrix(A, pattern->getRowData(), pattern->getColumnData(), new_values, pattern->getNnz()); delete[] new_values; - return 0; + return info; } /** @@ -503,20 +502,23 @@ namespace ReSolve */ int MatrixHandlerCpu::scaleAddI(matrix::Csr* A, real_type alpha) { + if (index_type info = scaleConst(A, alpha), info == 1) + { + return info; + } + // Reuse sparsity pattern if it is available if (workspace_->scaleAddISetup()) { ScaleAddIBuffer* pattern = workspace_->getScaleAddIBuffer(); - return scaleAddIWithPattern(A, alpha, pattern); + return addIWithPattern(A, pattern); } - scaleConst(A, alpha); - - auto new_row_pointers = new index_type[A->getNumRows() + 1]; + std::vector new_row_pointers(A->getNumRows() + 1); // At most we add one element per row/column index_type max_nnz_count = A->getNnz() + A->getNumRows(); - auto new_col_indices = new index_type[max_nnz_count]; - auto new_values = new real_type[max_nnz_count]; + std::vector new_col_indices(max_nnz_count); + auto new_values = new real_type[max_nnz_count]; index_type const* const original_row_pointers = A->getRowData(memory::HOST); index_type const* const original_col_indices = A->getColData(memory::HOST); @@ -572,14 +574,12 @@ namespace ReSolve } new_row_pointers[A->getNumRows()] = new_nnz_count; assert(new_nnz_count <= max_nnz_count); + new_col_indices.resize(new_nnz_count); + auto pattern = new ScaleAddIBuffer(std::move(new_row_pointers), std::move(new_col_indices)); + // workspace_ owns pattern + workspace_->setScaleAddIBuffer(pattern); + updateMatrix(A, pattern->getRowData(), pattern->getColumnData(), new_values, pattern->getNnz()); - updateMatrix(A, new_row_pointers, new_col_indices, new_values, new_nnz_count); - - auto sparsity_pattern = new ScaleAddIBuffer(A->getRowData(memory::HOST), A->getNumRows() + 1, A->getColData(memory::HOST), A->getNnz()); - workspace_->setScaleAddIBuffer(sparsity_pattern); - - delete[] new_row_pointers; - delete[] new_col_indices; delete[] new_values; return 0; diff --git a/resolve/workspace/LinAlgWorkspaceCpu.cpp b/resolve/workspace/LinAlgWorkspaceCpu.cpp index 27cb85d74..0af3fbf46 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.cpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.cpp @@ -14,8 +14,8 @@ namespace ReSolve * @param[in] col_data - pointer to column data (array of integers, length: nnz) * @param[in] nnz - number of non-zeros */ - ScaleAddIBuffer::ScaleAddIBuffer(index_type* row_data, index_type nrows, index_type* col_data, index_type nnz) - : row_data_(row_data, row_data + nrows + 1), col_data_(col_data, col_data + nnz) + ScaleAddIBuffer::ScaleAddIBuffer(std::vector row_data, std::vector col_data) + : row_data_(std::move(row_data)), col_data_(std::move(col_data)) { } diff --git a/resolve/workspace/LinAlgWorkspaceCpu.hpp b/resolve/workspace/LinAlgWorkspaceCpu.hpp index c020285cf..0d97bf602 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.hpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.hpp @@ -9,7 +9,7 @@ namespace ReSolve class ScaleAddIBuffer { public: - ScaleAddIBuffer(index_type* row_data, index_type nrows, index_type* col_data, index_type nnz); + ScaleAddIBuffer(std::vector row_data, std::vector col_data); index_type* getRowData(); index_type* getColumnData(); index_type getNumRows(); From adcc3b8e1d5acc9c0316f86628db0e3b913bac90 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Fri, 14 Nov 2025 16:45:49 -0500 Subject: [PATCH 08/47] fix documentation Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandlerCpu.cpp | 5 +++-- resolve/workspace/LinAlgWorkspaceCpu.cpp | 10 ++++++++++ tests/unit/matrix/MatrixHandlerTests.hpp | 5 +++-- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index 9b713fcde..cf07f604b 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -421,7 +421,7 @@ namespace ReSolve */ static int updateMatrix(matrix::Sparse* A, index_type* row_data, index_type* col_data, real_type* val_data, index_type nnz) { - if (A->destroyMatrixData(memory::HOST) == -1) + if (A->destroyMatrixData(memory::HOST) != 0) { return 1; } @@ -502,7 +502,8 @@ namespace ReSolve */ int MatrixHandlerCpu::scaleAddI(matrix::Csr* A, real_type alpha) { - if (index_type info = scaleConst(A, alpha), info == 1) + index_type info = scaleConst(A, alpha); + if (info == 1) { return info; } diff --git a/resolve/workspace/LinAlgWorkspaceCpu.cpp b/resolve/workspace/LinAlgWorkspaceCpu.cpp index 0af3fbf46..908a6e01e 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.cpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.cpp @@ -19,11 +19,21 @@ namespace ReSolve { } + /** + * @brief Retrieve row sparsity pattern + * + * @return precalculated row pointers + */ index_type* ScaleAddIBuffer::getRowData() { return row_data_.data(); } + /** + * @brief Retrieve column sparsity pattern + * + * @return precalculated column indices + */ index_type* ScaleAddIBuffer::getColumnData() { return col_data_.data(); diff --git a/tests/unit/matrix/MatrixHandlerTests.hpp b/tests/unit/matrix/MatrixHandlerTests.hpp index 80f0fbd78..db1d0a1ce 100644 --- a/tests/unit/matrix/MatrixHandlerTests.hpp +++ b/tests/unit/matrix/MatrixHandlerTests.hpp @@ -705,9 +705,10 @@ namespace ReSolve * @pre A is a valid, allocated CSR matrix * @invariant A * @param[in] A matrix::Csr* pointer to the matrix to be verified + * @param[in] alpha matrix scale factor * @return bool true if the matrix is valid, false otherwise */ - bool verifyScaleAddICsrMatrix(matrix::Csr* A, real_type scale) + bool verifyScaleAddICsrMatrix(matrix::Csr* A, real_type alpha) { // Check if the matrix is valid if (A == nullptr) @@ -727,7 +728,7 @@ namespace ReSolve index_type* scaled_row_ptr = A->getRowData(memory::HOST); real_type* scaled_value = A->getValues(memory::HOST); - const real_type expected = 30. * scale + 1.; + const real_type expected = 30. * alpha + 1.; // Verify values - each element scaled by scale. Diagonal elements should be 1. for (index_type i = 0; i < n; ++i) From fa0c3601c56949a50de94d6838eff31c97688c96 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Fri, 14 Nov 2025 19:25:30 -0500 Subject: [PATCH 09/47] test reuse sparsity pattern Signed-off-by: Steven Hahn --- resolve/workspace/LinAlgWorkspaceCpu.cpp | 1 + tests/unit/matrix/MatrixHandlerTests.hpp | 18 +++++++++++++----- tests/unit/matrix/runMatrixHandlerTests.cpp | 2 ++ 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/resolve/workspace/LinAlgWorkspaceCpu.cpp b/resolve/workspace/LinAlgWorkspaceCpu.cpp index 908a6e01e..6af29b03e 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.cpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.cpp @@ -108,6 +108,7 @@ namespace ReSolve { assert(scaleaddi_buffer_ == nullptr); scaleaddi_buffer_ = buffer; + scaleAddISetupDone(); } } // namespace ReSolve diff --git a/tests/unit/matrix/MatrixHandlerTests.hpp b/tests/unit/matrix/MatrixHandlerTests.hpp index db1d0a1ce..4aeba9de1 100644 --- a/tests/unit/matrix/MatrixHandlerTests.hpp +++ b/tests/unit/matrix/MatrixHandlerTests.hpp @@ -214,7 +214,17 @@ namespace ReSolve { A->syncData(memory::HOST); } - status *= verifyScaleAddICsrMatrix(A, val); + // expected sum is 2*30+1 + status *= verifyScaleAddICsrMatrix(A, 61); + + // run again to reuse sparsity pattern. + handler_.scaleAddI(A, val, memspace_); + if (memspace_ == memory::DEVICE) + { + A->syncData(memory::HOST); + } + // expected sum is 2*61+1 + status *= verifyScaleAddICsrMatrix(A, 123); delete A; return status.report(testname.c_str()); @@ -705,10 +715,10 @@ namespace ReSolve * @pre A is a valid, allocated CSR matrix * @invariant A * @param[in] A matrix::Csr* pointer to the matrix to be verified - * @param[in] alpha matrix scale factor + * @param[in] expected sum of row elements * @return bool true if the matrix is valid, false otherwise */ - bool verifyScaleAddICsrMatrix(matrix::Csr* A, real_type alpha) + bool verifyScaleAddICsrMatrix(matrix::Csr* A, real_type expected) { // Check if the matrix is valid if (A == nullptr) @@ -728,8 +738,6 @@ namespace ReSolve index_type* scaled_row_ptr = A->getRowData(memory::HOST); real_type* scaled_value = A->getValues(memory::HOST); - const real_type expected = 30. * alpha + 1.; - // Verify values - each element scaled by scale. Diagonal elements should be 1. for (index_type i = 0; i < n; ++i) { diff --git a/tests/unit/matrix/runMatrixHandlerTests.cpp b/tests/unit/matrix/runMatrixHandlerTests.cpp index 4e9cbbc90..d705460d0 100644 --- a/tests/unit/matrix/runMatrixHandlerTests.cpp +++ b/tests/unit/matrix/runMatrixHandlerTests.cpp @@ -76,8 +76,10 @@ void runTests(const std::string& backend, ReSolve::tests::TestingResults& result result += test.rightScale(1024, 2048); workspace.resetLinAlgWorkspace(); result += test.rightScale(2048, 1024); +#if !defined(RESOLVE_USE_CUDA) && !defined(RESOLVE_USE_HIP) workspace.resetLinAlgWorkspace(); result += test.scaleAddI(100); +#endif std::cout << "\n"; } From 50266984a4d24807a9c0d5483706aa1cdc7a8e74 Mon Sep 17 00:00:00 2001 From: quantumsteve Date: Sat, 15 Nov 2025 00:27:16 +0000 Subject: [PATCH 10/47] Apply pre-commmit fixes --- resolve/matrix/MatrixHandlerCpu.cpp | 4 ++-- resolve/workspace/LinAlgWorkspaceCpu.hpp | 10 +++++----- tests/unit/matrix/MatrixHandlerTests.hpp | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index cf07f604b..87168c76a 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -400,7 +400,7 @@ namespace ReSolve */ static int scaleConst(matrix::Sparse* A, real_type alpha) { - real_type* values = A->getValues(memory::HOST); + real_type* values = A->getValues(memory::HOST); const index_type nnz = A->getNnz(); for (index_type i = 0; i < nnz; ++i) { @@ -517,7 +517,7 @@ namespace ReSolve std::vector new_row_pointers(A->getNumRows() + 1); // At most we add one element per row/column - index_type max_nnz_count = A->getNnz() + A->getNumRows(); + index_type max_nnz_count = A->getNnz() + A->getNumRows(); std::vector new_col_indices(max_nnz_count); auto new_values = new real_type[max_nnz_count]; diff --git a/resolve/workspace/LinAlgWorkspaceCpu.hpp b/resolve/workspace/LinAlgWorkspaceCpu.hpp index 0d97bf602..3aaa0618a 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.hpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.hpp @@ -12,9 +12,9 @@ namespace ReSolve ScaleAddIBuffer(std::vector row_data, std::vector col_data); index_type* getRowData(); index_type* getColumnData(); - index_type getNumRows(); - index_type getNumColumns(); - index_type getNnz(); + index_type getNumRows(); + index_type getNumColumns(); + index_type getNnz(); private: std::vector row_data_; @@ -26,8 +26,8 @@ namespace ReSolve public: LinAlgWorkspaceCpu(); ~LinAlgWorkspaceCpu(); - void initializeHandles(); - void resetLinAlgWorkspace(); + void initializeHandles(); + void resetLinAlgWorkspace(); bool scaleAddISetup(); void scaleAddISetupDone(); ScaleAddIBuffer* getScaleAddIBuffer(); diff --git a/tests/unit/matrix/MatrixHandlerTests.hpp b/tests/unit/matrix/MatrixHandlerTests.hpp index 4aeba9de1..b8c154cba 100644 --- a/tests/unit/matrix/MatrixHandlerTests.hpp +++ b/tests/unit/matrix/MatrixHandlerTests.hpp @@ -730,13 +730,13 @@ namespace ReSolve const index_type n = A->getNumRows(); const index_type m = A->getNumColumns(); - if(n != m) + if (n != m) { return false; } - index_type* scaled_row_ptr = A->getRowData(memory::HOST); - real_type* scaled_value = A->getValues(memory::HOST); + index_type* scaled_row_ptr = A->getRowData(memory::HOST); + real_type* scaled_value = A->getValues(memory::HOST); // Verify values - each element scaled by scale. Diagonal elements should be 1. for (index_type i = 0; i < n; ++i) From 8b6f599e6ec8fe429ee69de5b7995e29acbad789 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Sun, 16 Nov 2025 20:22:15 -0500 Subject: [PATCH 11/47] use std:vector instead for c-style array Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandlerCpu.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index 87168c76a..bfa66c971 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -437,7 +437,7 @@ namespace ReSolve */ static int addIWithPattern(matrix::Csr* A, ScaleAddIBuffer* pattern) { - auto new_values = new real_type[pattern->getNnz()]; + std::vector new_values(pattern->getNnz()); index_type const* const original_row_pointers = A->getRowData(memory::HOST); index_type const* const original_col_indices = A->getColData(memory::HOST); @@ -487,8 +487,7 @@ namespace ReSolve } assert(new_nnz_count == pattern->getNnz()); - index_type info = updateMatrix(A, pattern->getRowData(), pattern->getColumnData(), new_values, pattern->getNnz()); - delete[] new_values; + index_type info = updateMatrix(A, pattern->getRowData(), pattern->getColumnData(), new_values.data(), pattern->getNnz()); return info; } @@ -519,7 +518,7 @@ namespace ReSolve // At most we add one element per row/column index_type max_nnz_count = A->getNnz() + A->getNumRows(); std::vector new_col_indices(max_nnz_count); - auto new_values = new real_type[max_nnz_count]; + std::vector new_values(max_nnz_count); index_type const* const original_row_pointers = A->getRowData(memory::HOST); index_type const* const original_col_indices = A->getColData(memory::HOST); @@ -579,10 +578,7 @@ namespace ReSolve auto pattern = new ScaleAddIBuffer(std::move(new_row_pointers), std::move(new_col_indices)); // workspace_ owns pattern workspace_->setScaleAddIBuffer(pattern); - updateMatrix(A, pattern->getRowData(), pattern->getColumnData(), new_values, pattern->getNnz()); - - delete[] new_values; - + updateMatrix(A, pattern->getRowData(), pattern->getColumnData(), new_values.data(), pattern->getNnz()); return 0; } } // namespace ReSolve From 100ef28bb64ff6e0e377bfb189276b3cb7720eb2 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Mon, 17 Nov 2025 09:25:07 -0500 Subject: [PATCH 12/47] fix GPU build Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandlerCuda.cpp | 2 +- resolve/matrix/MatrixHandlerHip.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index b56cb8fe0..9c35dcf96 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -409,7 +409,7 @@ namespace ReSolve * @param[in] alpha - constant to the added * @return 0 if successful, 1 otherwise */ - int MatrixHandlerCpu::scaleAddI(matrix::Csr* A, real_type alpha) + int MatrixHandlerCuda::scaleAddI(matrix::Csr* A, real_type alpha) { // NOT IMPLEMENTED return 1; diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp index a7e3de020..24115f7b5 100644 --- a/resolve/matrix/MatrixHandlerHip.cpp +++ b/resolve/matrix/MatrixHandlerHip.cpp @@ -377,7 +377,7 @@ namespace ReSolve * @param[in] alpha - constant to the added * @return 0 if successful, 1 otherwise */ - int MatrixHandlerCpu::scaleAddI(matrix::Csr* A, real_type alpha) + int MatrixHandlerHip::scaleAddI(matrix::Csr* A, real_type alpha) { // NOT IMPLEMENTED return 1; From 6665612498be3235a846b5a39e201ab723e11d8b Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Mon, 17 Nov 2025 09:49:20 -0500 Subject: [PATCH 13/47] use camelCase Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandlerCpu.cpp | 132 +++++++++++------------ resolve/workspace/LinAlgWorkspaceCpu.cpp | 30 +++--- resolve/workspace/LinAlgWorkspaceCpu.hpp | 10 +- 3 files changed, 86 insertions(+), 86 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index bfa66c971..3e3a24f76 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -419,13 +419,13 @@ namespace ReSolve * @param[in] nnz - number of non-zer * @return 0 if successful, 1 otherwise */ - static int updateMatrix(matrix::Sparse* A, index_type* row_data, index_type* col_data, real_type* val_data, index_type nnz) + static int updateMatrix(matrix::Sparse* A, index_type* rowData, index_type* columnData, real_type* valData, index_type nnz) { if (A->destroyMatrixData(memory::HOST) != 0) { return 1; } - return A->copyDataFrom(row_data, col_data, val_data, nnz, memory::HOST, memory::HOST); + return A->copyDataFrom(rowData, columnData, valData, nnz, memory::HOST, memory::HOST); } /** @@ -437,57 +437,57 @@ namespace ReSolve */ static int addIWithPattern(matrix::Csr* A, ScaleAddIBuffer* pattern) { - std::vector new_values(pattern->getNnz()); + std::vector newValues(pattern->getNnz()); - index_type const* const original_row_pointers = A->getRowData(memory::HOST); - index_type const* const original_col_indices = A->getColData(memory::HOST); - real_type const* const original_values = A->getValues(memory::HOST); + index_type const* const originalRowPointers = A->getRowData(memory::HOST); + index_type const* const originalColumnIndices = A->getColData(memory::HOST); + real_type const* const originalValues = A->getValues(memory::HOST); - index_type new_nnz_count = 0; + index_type newNnzCount = 0; for (index_type i = 0; i < A->getNumRows(); ++i) { - const index_type original_row_start = original_row_pointers[i]; - const index_type original_row_end = original_row_pointers[i + 1]; + const index_type originalRowStart = originalRowPointers[i]; + const index_type originalRowEnd = originalRowPointers[i + 1]; - bool diagonal_added = false; - for (index_type j = original_row_start; j < original_row_end; ++j) + bool diagonalAdded = false; + for (index_type j = originalRowStart; j < originalRowEnd; ++j) { - if (original_col_indices[j] == i) + if (originalColumnIndices[j] == i) { // Diagonal element found in original matrix - new_values[new_nnz_count] = original_values[j] + 1.0; - new_nnz_count++; - diagonal_added = true; + newValues[newNnzCount] = originalValues[j] + 1.0; + newNnzCount++; + diagonalAdded = true; } - else if (original_col_indices[j] > i && !diagonal_added) + else if (originalColumnIndices[j] > i && !diagonalAdded) { // Insert diagonal if not found yet - new_values[new_nnz_count] = 1.; - new_nnz_count++; - diagonal_added = true; // Mark as added to prevent re-insertion + newValues[newNnzCount] = 1.; + newNnzCount++; + diagonalAdded = true; // Mark as added to prevent re-insertion // Then add the current original element - new_values[new_nnz_count] = original_values[j]; - new_nnz_count++; + newValues[newNnzCount] = originalValues[j]; + newNnzCount++; } else { // Elements before diagonal, elements after diagonal and the // diagonal is already handled - new_values[new_nnz_count] = original_values[j]; - new_nnz_count++; + newValues[newNnzCount] = originalValues[j]; + newNnzCount++; } } // If diagonal element was not present in original row - if (!diagonal_added) + if (!diagonalAdded) { - new_values[new_nnz_count] = 1.; - new_nnz_count++; + newValues[newNnzCount] = 1.; + newNnzCount++; } } - assert(new_nnz_count == pattern->getNnz()); - index_type info = updateMatrix(A, pattern->getRowData(), pattern->getColumnData(), new_values.data(), pattern->getNnz()); + assert(newNnzCount == pattern->getNnz()); + index_type info = updateMatrix(A, pattern->getRowData(), pattern->getColumnData(), newValues.data(), pattern->getNnz()); return info; } @@ -514,71 +514,71 @@ namespace ReSolve return addIWithPattern(A, pattern); } - std::vector new_row_pointers(A->getNumRows() + 1); + std::vector newRowPointers(A->getNumRows() + 1); // At most we add one element per row/column - index_type max_nnz_count = A->getNnz() + A->getNumRows(); - std::vector new_col_indices(max_nnz_count); - std::vector new_values(max_nnz_count); + index_type maxNnzCount = A->getNnz() + A->getNumRows(); + std::vector newColumnIndices(maxNnzCount); + std::vector newValues(maxNnzCount); - index_type const* const original_row_pointers = A->getRowData(memory::HOST); - index_type const* const original_col_indices = A->getColData(memory::HOST); - real_type const* const original_values = A->getValues(memory::HOST); + index_type const* const originalRowPointers = A->getRowData(memory::HOST); + index_type const* const originalColIndices = A->getColData(memory::HOST); + real_type const* const originalValues = A->getValues(memory::HOST); - index_type new_nnz_count = 0; + index_type newNnzCount = 0; for (index_type i = 0; i < A->getNumRows(); ++i) { - new_row_pointers[i] = new_nnz_count; - const index_type original_row_start = original_row_pointers[i]; - const index_type original_row_end = original_row_pointers[i + 1]; + newRowPointers[i] = newNnzCount; + const index_type originalRowStart = originalRowPointers[i]; + const index_type originalRowEnd = originalRowPointers[i + 1]; - bool diagonal_added = false; - for (index_type j = original_row_start; j < original_row_end; ++j) + bool diagonalAdded = false; + for (index_type j = originalRowStart; j < originalRowEnd; ++j) { - if (original_col_indices[j] == i) + if (originalColIndices[j] == i) { // Diagonal element found in original matrix - new_values[new_nnz_count] = original_values[j] + 1.0; - new_col_indices[new_nnz_count] = i; - new_nnz_count++; - diagonal_added = true; + newValues[newNnzCount] = originalValues[j] + 1.0; + newColumnIndices[newNnzCount] = i; + newNnzCount++; + diagonalAdded = true; } - else if (original_col_indices[j] > i && !diagonal_added) + else if (originalColIndices[j] > i && !diagonalAdded) { // Insert diagonal if not found yet - new_values[new_nnz_count] = 1.; - new_col_indices[new_nnz_count] = i; - new_nnz_count++; - diagonal_added = true; // Mark as added to prevent re-insertion + newValues[newNnzCount] = 1.; + newColumnIndices[newNnzCount] = i; + newNnzCount++; + diagonalAdded = true; // Mark as added to prevent re-insertion // Then add the current original element - new_values[new_nnz_count] = original_values[j]; - new_col_indices[new_nnz_count] = original_col_indices[j]; - new_nnz_count++; + newValues[newNnzCount] = originalValues[j]; + newColumnIndices[newNnzCount] = originalColIndices[j]; + newNnzCount++; } else { // Elements before diagonal, elements after diagonal and the // diagonal is already handled - new_values[new_nnz_count] = original_values[j]; - new_col_indices[new_nnz_count] = original_col_indices[j]; - new_nnz_count++; + newValues[newNnzCount] = originalValues[j]; + newColumnIndices[newNnzCount] = originalColIndices[j]; + newNnzCount++; } } // If diagonal element was not present in original row - if (!diagonal_added) + if (!diagonalAdded) { - new_values[new_nnz_count] = 1.; - new_col_indices[new_nnz_count] = i; - new_nnz_count++; + newValues[newNnzCount] = 1.; + newColumnIndices[newNnzCount] = i; + newNnzCount++; } } - new_row_pointers[A->getNumRows()] = new_nnz_count; - assert(new_nnz_count <= max_nnz_count); - new_col_indices.resize(new_nnz_count); - auto pattern = new ScaleAddIBuffer(std::move(new_row_pointers), std::move(new_col_indices)); + newRowPointers[A->getNumRows()] = newNnzCount; + assert(newNnzCount <= maxNnzCount); + newColumnIndices.resize(newNnzCount); + auto pattern = new ScaleAddIBuffer(std::move(newRowPointers), std::move(newColumnIndices)); // workspace_ owns pattern workspace_->setScaleAddIBuffer(pattern); - updateMatrix(A, pattern->getRowData(), pattern->getColumnData(), new_values.data(), pattern->getNnz()); + updateMatrix(A, pattern->getRowData(), pattern->getColumnData(), newValues.data(), pattern->getNnz()); return 0; } } // namespace ReSolve diff --git a/resolve/workspace/LinAlgWorkspaceCpu.cpp b/resolve/workspace/LinAlgWorkspaceCpu.cpp index 6af29b03e..5f175eb5a 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.cpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.cpp @@ -14,8 +14,8 @@ namespace ReSolve * @param[in] col_data - pointer to column data (array of integers, length: nnz) * @param[in] nnz - number of non-zeros */ - ScaleAddIBuffer::ScaleAddIBuffer(std::vector row_data, std::vector col_data) - : row_data_(std::move(row_data)), col_data_(std::move(col_data)) + ScaleAddIBuffer::ScaleAddIBuffer(std::vector rowData, std::vector colData) + : rowData_(std::move(rowData)), colData_(std::move(colData)) { } @@ -26,7 +26,7 @@ namespace ReSolve */ index_type* ScaleAddIBuffer::getRowData() { - return row_data_.data(); + return rowData_.data(); } /** @@ -36,7 +36,7 @@ namespace ReSolve */ index_type* ScaleAddIBuffer::getColumnData() { - return col_data_.data(); + return colData_.data(); } /** @@ -46,7 +46,7 @@ namespace ReSolve */ index_type ScaleAddIBuffer::getNumRows() { - return static_cast(row_data_.size()) - 1; + return static_cast(rowData_.size()) - 1; } /** @@ -66,7 +66,7 @@ namespace ReSolve */ index_type ScaleAddIBuffer::getNnz() { - return static_cast(col_data_.size()); + return static_cast(colData_.size()); } LinAlgWorkspaceCpu::LinAlgWorkspaceCpu() @@ -75,7 +75,7 @@ namespace ReSolve LinAlgWorkspaceCpu::~LinAlgWorkspaceCpu() { - delete scaleaddi_buffer_; + delete scaleAddIBuffer_; } void LinAlgWorkspaceCpu::initializeHandles() @@ -84,30 +84,30 @@ namespace ReSolve void LinAlgWorkspaceCpu::resetLinAlgWorkspace() { - delete scaleaddi_buffer_; - scaleaddi_buffer_ = nullptr; + delete scaleAddIBuffer_; + scaleAddIBuffer_ = nullptr; } bool LinAlgWorkspaceCpu::scaleAddISetup() { - return scaleaddi_setup_done_; + return scaleAddISetupDone_; } void LinAlgWorkspaceCpu::scaleAddISetupDone() { - scaleaddi_setup_done_ = true; + scaleAddISetupDone_ = true; } ScaleAddIBuffer* LinAlgWorkspaceCpu::getScaleAddIBuffer() { - assert(scaleaddi_buffer_ != nullptr); - return scaleaddi_buffer_; + assert(scaleAddIBuffer_ != nullptr); + return scaleAddIBuffer_; } void LinAlgWorkspaceCpu::setScaleAddIBuffer(ScaleAddIBuffer* buffer) { - assert(scaleaddi_buffer_ == nullptr); - scaleaddi_buffer_ = buffer; + assert(scaleAddIBuffer_ == nullptr); + scaleAddIBuffer_ = buffer; scaleAddISetupDone(); } diff --git a/resolve/workspace/LinAlgWorkspaceCpu.hpp b/resolve/workspace/LinAlgWorkspaceCpu.hpp index 3aaa0618a..3d409bdbe 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.hpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.hpp @@ -9,7 +9,7 @@ namespace ReSolve class ScaleAddIBuffer { public: - ScaleAddIBuffer(std::vector row_data, std::vector col_data); + ScaleAddIBuffer(std::vector rowData, std::vector colData); index_type* getRowData(); index_type* getColumnData(); index_type getNumRows(); @@ -17,8 +17,8 @@ namespace ReSolve index_type getNnz(); private: - std::vector row_data_; - std::vector col_data_; + std::vector rowData_; + std::vector colData_; }; class LinAlgWorkspaceCpu @@ -35,8 +35,8 @@ namespace ReSolve private: // check if setup is done for scaleAddI i.e. if buffer is allocated, csr structure is set etc. - bool scaleaddi_setup_done_{false}; - ScaleAddIBuffer* scaleaddi_buffer_{nullptr}; + bool scaleAddISetupDone_{false}; + ScaleAddIBuffer* scaleAddIBuffer_{nullptr}; }; } // namespace ReSolve From c0652a2e2a52a6c59d11446a48f5d4128895fd08 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Mon, 17 Nov 2025 09:38:29 -0500 Subject: [PATCH 14/47] checkpoint Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandler.cpp | 28 ++++++++++++++ resolve/matrix/MatrixHandler.hpp | 2 + resolve/matrix/MatrixHandlerCpu.cpp | 56 ++++++++++++++++++++++++++++ resolve/matrix/MatrixHandlerCuda.cpp | 14 +++++++ resolve/matrix/MatrixHandlerCuda.hpp | 2 + resolve/matrix/MatrixHandlerHip.cpp | 14 +++++++ resolve/matrix/MatrixHandlerHip.hpp | 2 + resolve/matrix/MatrixHandlerImpl.hpp | 2 + 8 files changed, 120 insertions(+) diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index 0a0699c5d..d20b8740b 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -376,6 +376,34 @@ namespace ReSolve return 1; } + /** + * @brief Multiply csr matrix by a constant and add B. + * @param[in,out] A - Sparse CSR matrix + * @param[in] alpha - scalar parameter + * @param[in] B - Sparse CSR matrix + * @param[in] memspace - Device where the operation is computed + * @return 0 if successful, 1 otherwise + */ + int MatrixHandler::scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B, memory::MemorySpace memspace) + { + assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix is assumed to be in CSR format.\n"); + assert(A->getValues(memspace) != nullptr && "Matrix values are null!\n"); + assert(A->getNumRows() == A->getNumColumns() && "Matrix must be square.\n"); + using namespace ReSolve::memory; + switch (memspace) + { + case HOST: + return cpuImpl_->scaleAddB(A, alpha, B); + break; + case DEVICE: + return devImpl_->scaleAddB(A, alpha, B); + break; + } + return 1; + } + } + + /** * @brief If CUDA support is enabled in the handler. * diff --git a/resolve/matrix/MatrixHandler.hpp b/resolve/matrix/MatrixHandler.hpp index 312d02e68..cd7fdcc89 100644 --- a/resolve/matrix/MatrixHandler.hpp +++ b/resolve/matrix/MatrixHandler.hpp @@ -64,6 +64,8 @@ namespace ReSolve int scaleAddI(matrix::Csr* A, real_type alpha, memory::MemorySpace memspace); + int scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B, memory::MemorySpace memspace); + /// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped int matvec(matrix::Sparse* A, vector_type* vec_x, diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index 3e3a24f76..ad9ec7042 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -581,4 +581,60 @@ namespace ReSolve updateMatrix(A, pattern->getRowData(), pattern->getColumnData(), newValues.data(), pattern->getNnz()); return 0; } + + /** + * @brief Multiply csr matrix by a constant and add B. + * + * @param[in,out] A - Sparse CSR matrix + * @param[in] alpha - constant to the added + * @param[in] B - Sparse CSR matrix + * @return 0 if successful, 1 otherwise + */ + int MatrixHandlerCpu::scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr *B) + { + index_type info = scaleConst(A, alpha); + if (info == 1) + { + return info; + } + + // Reuse sparsity pattern if it is available + if (workspace_->scaleAddISetup()) + { + ScaleAddIBuffer* pattern = workspace_->getScaleAddIBuffer(); + return addIWithPattern(A, pattern); + } + + std::vector newRowPointers(A->getNumRows() + 1); + std::vector newColIndices(); + std::vector newValues(); + + index_type const* const aRowPointers = A->getRowData(memory::HOST); + index_type const* const aColIndices = A->getColData(memory::HOST); + real_type const* const aValues = A->getValues(memory::HOST); + + index_type const* const bRowPointers = B->getRowData(memory::HOST); + index_type const* const bColIndices = B->getColData(memory::HOST); + real_type const* const bValues = B->getValues(memory::HOST); + + index_type newNnzCount = 0; + for (index_type i = 0; i < A->getNumRows(); ++i) + { + newRowPointers.push_back(newNnzCount); + const index_type aRowStart = aRowPointers[i]; + const index_type aRowEnd = aRowPointers[i + 1]; + + const index_type bRowStart = bRowPointers[i]; + const index_type bRowEnd = bRowPointers[i + 1]; + + for (index_type j = aRowStart; j < aRowEnd; ++j) + { + + } + + + + return 1; + } + } // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index 9c35dcf96..bcffef79c 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -415,4 +415,18 @@ namespace ReSolve return 1; } + /** + * @brief Multiply csr matrix by a constant and add B. + * + * @param[in,out] A - Sparse CSR matrix + * @param[in] alpha - constant to the added + * @param[in] B - Sparse CSR matrix + * @return 0 if successful, 1 otherwise + */ + int MatrixHandlerCuda::scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr *B) + { + // NOT IMPLEMENTED + return 1; + } + } // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerCuda.hpp b/resolve/matrix/MatrixHandlerCuda.hpp index b85c39faf..570ea6edf 100644 --- a/resolve/matrix/MatrixHandlerCuda.hpp +++ b/resolve/matrix/MatrixHandlerCuda.hpp @@ -47,6 +47,8 @@ namespace ReSolve int scaleAddI(matrix::Csr* A, real_type alpha) override; + int scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B) override; + virtual int matvec(matrix::Sparse* A, vector_type* vec_x, vector_type* vec_result, diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp index 24115f7b5..ba0b6039d 100644 --- a/resolve/matrix/MatrixHandlerHip.cpp +++ b/resolve/matrix/MatrixHandlerHip.cpp @@ -383,4 +383,18 @@ namespace ReSolve return 1; } + /** + * @brief Multiply csr matrix by a constant and add B. + * + * @param[in,out] A - Sparse CSR matrix + * @param[in] alpha - constant to the added + * @param[in] B - Sparse CSR matrix + * @return 0 if successful, 1 otherwise + */ + int MatrixHandlerHip::scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr *B) + { + // NOT IMPLEMENTED + return 1; + } + } // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerHip.hpp b/resolve/matrix/MatrixHandlerHip.hpp index 8f14cc42f..400542c79 100644 --- a/resolve/matrix/MatrixHandlerHip.hpp +++ b/resolve/matrix/MatrixHandlerHip.hpp @@ -47,6 +47,8 @@ namespace ReSolve int scaleAddI(matrix::Csr* A, real_type alpha) override; + int scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B) override; + virtual int matvec(matrix::Sparse* A, vector_type* vec_x, vector_type* vec_result, diff --git a/resolve/matrix/MatrixHandlerImpl.hpp b/resolve/matrix/MatrixHandlerImpl.hpp index 771ab4839..80fbb631a 100644 --- a/resolve/matrix/MatrixHandlerImpl.hpp +++ b/resolve/matrix/MatrixHandlerImpl.hpp @@ -48,6 +48,8 @@ namespace ReSolve virtual int scaleAddI(matrix::Csr* A, real_type alpha) = 0; + virtual int scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B) = 0; + virtual int matvec(matrix::Sparse* A, vector_type* vec_x, vector_type* vec_result, From a6efc949d3a38c09b28a89082f7d1b503d8c67ba Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Mon, 17 Nov 2025 10:58:32 -0500 Subject: [PATCH 15/47] scaleAddB passing test Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandler.cpp | 2 - resolve/matrix/MatrixHandlerCpu.cpp | 91 +++++++++++++++------ resolve/matrix/MatrixHandlerCpu.hpp | 2 + tests/unit/matrix/MatrixHandlerTests.hpp | 30 +++++++ tests/unit/matrix/runMatrixHandlerTests.cpp | 2 + 5 files changed, 99 insertions(+), 28 deletions(-) diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index d20b8740b..fa2dd6c38 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -401,8 +401,6 @@ namespace ReSolve } return 1; } - } - /** * @brief If CUDA support is enabled in the handler. diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index ad9ec7042..1fbeb1ada 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -590,51 +590,90 @@ namespace ReSolve * @param[in] B - Sparse CSR matrix * @return 0 if successful, 1 otherwise */ - int MatrixHandlerCpu::scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr *B) + int MatrixHandlerCpu::scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B) { - index_type info = scaleConst(A, alpha); + index_type info = scaleConst(A, alpha); if (info == 1) { return info; } // Reuse sparsity pattern if it is available - if (workspace_->scaleAddISetup()) - { - ScaleAddIBuffer* pattern = workspace_->getScaleAddIBuffer(); - return addIWithPattern(A, pattern); - } - - std::vector newRowPointers(A->getNumRows() + 1); - std::vector newColIndices(); - std::vector newValues(); + // if (workspace_->scaleAddISetup()) + //{ + // ScaleAddIBuffer* pattern = workspace_->getScaleAddIBuffer(); + // return addIWithPattern(A, pattern); + //} index_type const* const aRowPointers = A->getRowData(memory::HOST); index_type const* const aColIndices = A->getColData(memory::HOST); - real_type const* const aValues = A->getValues(memory::HOST); + real_type const* const aValues = A->getValues(memory::HOST); index_type const* const bRowPointers = B->getRowData(memory::HOST); index_type const* const bColIndices = B->getColData(memory::HOST); - real_type const* const bValues = B->getValues(memory::HOST); + real_type const* const bValues = B->getValues(memory::HOST); - index_type newNnzCount = 0; + std::vector newRowPointers; + newRowPointers.reserve(A->getNumRows() + 1); + std::vector newColIndices; + std::vector newValues; + + newRowPointers.push_back(0); // First element is always 0 for (index_type i = 0; i < A->getNumRows(); ++i) { - newRowPointers.push_back(newNnzCount); - const index_type aRowStart = aRowPointers[i]; - const index_type aRowEnd = aRowPointers[i + 1]; + const index_type startA = aRowPointers[i]; + const index_type endA = aRowPointers[i + 1]; + const index_type startB = bRowPointers[i]; + const index_type endB = bRowPointers[i + 1]; - const index_type bRowStart = bRowPointers[i]; - const index_type bRowEnd = bRowPointers[i + 1]; + index_type ptrA = startA; + index_type ptrB = startB; - for (index_type j = aRowStart; j < aRowEnd; ++j) + // Merge the non-zero elements for the current row + while (ptrA < endA || ptrB < endB) { - + // Case 1: All remaining elements are in A + if (ptrB >= endB) + { + newColIndices.push_back(aColIndices[ptrA]); + newValues.push_back(aValues[ptrA]); + ptrA++; + } + // Case 2: All remaining elements are in B + else if (ptrA >= endA) + { + newColIndices.push_back(bColIndices[ptrB]); + newValues.push_back(bValues[ptrB]); + ptrB++; + } + // Case 3: Elements in both A and B. Compare column indices. + else + { + if (aColIndices[ptrA] < bColIndices[ptrB]) + { + newColIndices.push_back(aColIndices[ptrA]); + newValues.push_back(aValues[ptrA]); + ptrA++; + } + else if (bColIndices[ptrB] < aColIndices[ptrA]) + { + newColIndices.push_back(bColIndices[ptrB]); + newValues.push_back(bValues[ptrB]); + ptrB++; + } + // Case 4: Same column index, add values + else + { + real_type sum = aValues[ptrA] + bValues[ptrB]; + newColIndices.push_back(aColIndices[ptrA]); + newValues.push_back(sum); + ptrA++; + ptrB++; + } + } } - - - - return 1; + newRowPointers.push_back(static_cast(newValues.size())); + } + return updateMatrix(A, newRowPointers.data(), newColIndices.data(), newValues.data(), static_cast(newValues.size())); } - } // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerCpu.hpp b/resolve/matrix/MatrixHandlerCpu.hpp index 77a4b717c..a6dc5f259 100644 --- a/resolve/matrix/MatrixHandlerCpu.hpp +++ b/resolve/matrix/MatrixHandlerCpu.hpp @@ -48,6 +48,8 @@ namespace ReSolve int scaleAddI(matrix::Csr* A, real_type alpha) override; + int scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B); + virtual int matvec(matrix::Sparse* A, vector_type* vec_x, vector_type* vec_result, diff --git a/tests/unit/matrix/MatrixHandlerTests.hpp b/tests/unit/matrix/MatrixHandlerTests.hpp index b8c154cba..48b61d8a6 100644 --- a/tests/unit/matrix/MatrixHandlerTests.hpp +++ b/tests/unit/matrix/MatrixHandlerTests.hpp @@ -230,6 +230,36 @@ namespace ReSolve return status.report(testname.c_str()); } + TestOutcome scaleAddB(index_type n) + { + TestStatus status; + std::string testname(__func__); + matrix::Csr* A = createCsrMatrix(n); + matrix::Csr* B = createCsrMatrix(n); + real_type val = 2.; + + handler_.scaleAddB(A, val, B, memspace_); + if (memspace_ == memory::DEVICE) + { + A->syncData(memory::HOST); + } + // expected sum is 2*30+30 + status *= verifyScaleAddICsrMatrix(A, 90); + + // run again to reuse sparsity pattern. + handler_.scaleAddB(A, val, B, memspace_); + if (memspace_ == memory::DEVICE) + { + A->syncData(memory::HOST); + } + // expected sum is 2*90+30 + status *= verifyScaleAddICsrMatrix(A, 210); + + delete A; + delete B; + return status.report(testname.c_str()); + } + private: ReSolve::MatrixHandler& handler_; memory::MemorySpace memspace_{memory::HOST}; diff --git a/tests/unit/matrix/runMatrixHandlerTests.cpp b/tests/unit/matrix/runMatrixHandlerTests.cpp index d705460d0..398130e80 100644 --- a/tests/unit/matrix/runMatrixHandlerTests.cpp +++ b/tests/unit/matrix/runMatrixHandlerTests.cpp @@ -79,6 +79,8 @@ void runTests(const std::string& backend, ReSolve::tests::TestingResults& result #if !defined(RESOLVE_USE_CUDA) && !defined(RESOLVE_USE_HIP) workspace.resetLinAlgWorkspace(); result += test.scaleAddI(100); + workspace.resetLinAlgWorkspace(); + result += test.scaleAddB(100); #endif std::cout << "\n"; } From 6cd01c109ae22a26d084a855a424f84576b07616 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Mon, 17 Nov 2025 11:10:44 -0500 Subject: [PATCH 16/47] checkpoint --- resolve/matrix/MatrixHandlerCpu.cpp | 6 +++--- resolve/workspace/LinAlgWorkspaceCpu.cpp | 19 +++++++++++-------- resolve/workspace/LinAlgWorkspaceCpu.hpp | 18 +++++++++++++----- 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index 1fbeb1ada..1bb0e1378 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -435,7 +435,7 @@ namespace ReSolve * @param[in] pattern - precalculated sparsity pattern * @return 0 if successful, 1 otherwise */ - static int addIWithPattern(matrix::Csr* A, ScaleAddIBuffer* pattern) + static int addIWithPattern(matrix::Csr* A, ScaleAddBuffer* pattern) { std::vector newValues(pattern->getNnz()); @@ -510,7 +510,7 @@ namespace ReSolve // Reuse sparsity pattern if it is available if (workspace_->scaleAddISetup()) { - ScaleAddIBuffer* pattern = workspace_->getScaleAddIBuffer(); + ScaleAddBuffer* pattern = workspace_->getScaleAddIBuffer(); return addIWithPattern(A, pattern); } @@ -575,7 +575,7 @@ namespace ReSolve newRowPointers[A->getNumRows()] = newNnzCount; assert(newNnzCount <= maxNnzCount); newColumnIndices.resize(newNnzCount); - auto pattern = new ScaleAddIBuffer(std::move(newRowPointers), std::move(newColumnIndices)); + auto pattern = new ScaleAddBuffer(std::move(newRowPointers), std::move(newColumnIndices)); // workspace_ owns pattern workspace_->setScaleAddIBuffer(pattern); updateMatrix(A, pattern->getRowData(), pattern->getColumnData(), newValues.data(), pattern->getNnz()); diff --git a/resolve/workspace/LinAlgWorkspaceCpu.cpp b/resolve/workspace/LinAlgWorkspaceCpu.cpp index 5f175eb5a..278f77d6c 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.cpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.cpp @@ -14,7 +14,7 @@ namespace ReSolve * @param[in] col_data - pointer to column data (array of integers, length: nnz) * @param[in] nnz - number of non-zeros */ - ScaleAddIBuffer::ScaleAddIBuffer(std::vector rowData, std::vector colData) + ScaleAddBuffer::ScaleAddBuffer(std::vector rowData, std::vector colData) : rowData_(std::move(rowData)), colData_(std::move(colData)) { } @@ -24,7 +24,7 @@ namespace ReSolve * * @return precalculated row pointers */ - index_type* ScaleAddIBuffer::getRowData() + index_type* ScaleAddBuffer::getRowData() { return rowData_.data(); } @@ -34,7 +34,7 @@ namespace ReSolve * * @return precalculated column indices */ - index_type* ScaleAddIBuffer::getColumnData() + index_type* ScaleAddBuffer::getColumnData() { return colData_.data(); } @@ -44,7 +44,7 @@ namespace ReSolve * * @return number of matrix rows. */ - index_type ScaleAddIBuffer::getNumRows() + index_type ScaleAddBuffer::getNumRows() { return static_cast(rowData_.size()) - 1; } @@ -54,7 +54,7 @@ namespace ReSolve * * @return number of matrix columns. */ - index_type ScaleAddIBuffer::getNumColumns() + index_type ScaleAddBuffer::getNumColumns() { return getNumRows(); } @@ -64,7 +64,7 @@ namespace ReSolve * * @return number of non-zeros */ - index_type ScaleAddIBuffer::getNnz() + index_type ScaleAddBuffer::getNnz() { return static_cast(colData_.size()); } @@ -76,6 +76,7 @@ namespace ReSolve LinAlgWorkspaceCpu::~LinAlgWorkspaceCpu() { delete scaleAddIBuffer_; + delete scaleAddBBuffer_; } void LinAlgWorkspaceCpu::initializeHandles() @@ -86,6 +87,8 @@ namespace ReSolve { delete scaleAddIBuffer_; scaleAddIBuffer_ = nullptr; + delete scaleAddBBuffer_; + scaleAddBBuffer_ = nullptr; } bool LinAlgWorkspaceCpu::scaleAddISetup() @@ -98,13 +101,13 @@ namespace ReSolve scaleAddISetupDone_ = true; } - ScaleAddIBuffer* LinAlgWorkspaceCpu::getScaleAddIBuffer() + ScaleAddBuffer* LinAlgWorkspaceCpu::getScaleAddIBuffer() { assert(scaleAddIBuffer_ != nullptr); return scaleAddIBuffer_; } - void LinAlgWorkspaceCpu::setScaleAddIBuffer(ScaleAddIBuffer* buffer) + void LinAlgWorkspaceCpu::setScaleAddIBuffer(ScaleAddBuffer* buffer) { assert(scaleAddIBuffer_ == nullptr); scaleAddIBuffer_ = buffer; diff --git a/resolve/workspace/LinAlgWorkspaceCpu.hpp b/resolve/workspace/LinAlgWorkspaceCpu.hpp index 3d409bdbe..0e073ffe7 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.hpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.hpp @@ -6,10 +6,10 @@ namespace ReSolve { - class ScaleAddIBuffer + class ScaleAddBuffer { public: - ScaleAddIBuffer(std::vector rowData, std::vector colData); + ScaleAddBuffer(std::vector rowData, std::vector colData); index_type* getRowData(); index_type* getColumnData(); index_type getNumRows(); @@ -30,13 +30,21 @@ namespace ReSolve void resetLinAlgWorkspace(); bool scaleAddISetup(); void scaleAddISetupDone(); - ScaleAddIBuffer* getScaleAddIBuffer(); - void setScaleAddIBuffer(ScaleAddIBuffer* buffer); + ScaleAddBuffer* getScaleAddIBuffer(); + void setScaleAddIBuffer(ScaleAddBuffer* buffer); + bool scaleAddBSetup(); + void scaleAddBSetupDone(); + ScaleAddBuffer* getScaleAddBBuffer(); + void setScaleAddBBuffer(ScaleAddBuffer* buffer); + private: // check if setup is done for scaleAddI i.e. if buffer is allocated, csr structure is set etc. bool scaleAddISetupDone_{false}; - ScaleAddIBuffer* scaleAddIBuffer_{nullptr}; + ScaleAddBuffer* scaleAddIBuffer_{nullptr}; + // check if setup is done for scaleAddB i.e. if buffer is allocated, csr structure is set etc. + bool scaleAddBSetupDone_{false}; + ScaleAddBuffer* scaleAddBBuffer_{nullptr}; }; } // namespace ReSolve From 5cfa679c1d8e8ac73b2f4a2527e7372a874dc8fe Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Mon, 17 Nov 2025 11:32:36 -0500 Subject: [PATCH 17/47] working? Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandlerCpu.cpp | 90 ++++++++++++++++++++++-- resolve/workspace/LinAlgWorkspaceCpu.cpp | 24 +++++++ 2 files changed, 108 insertions(+), 6 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index 1bb0e1378..cd1823179 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -582,6 +582,80 @@ namespace ReSolve return 0; } + /** + * @brief Add the identity to a CSR matrix. + * + * @param[in,out] A - Sparse CSR matrix + * @param[in,out] B - Sparse CSR matrix + * @param[in] pattern - precalculated sparsity pattern + * @return 0 if successful, 1 otherwise + */ + static int addBWithPattern(matrix::Csr* A, matrix::Csr* B, ScaleAddBuffer* pattern) + { + index_type const* const aRowPointers = A->getRowData(memory::HOST); + index_type const* const aColIndices = A->getColData(memory::HOST); + real_type const* const aValues = A->getValues(memory::HOST); + + index_type const* const bRowPointers = B->getRowData(memory::HOST); + index_type const* const bColIndices = B->getColData(memory::HOST); + real_type const* const bValues = B->getValues(memory::HOST); + + std::vector newValues; + newValues.reserve(pattern->getNnz()); + + for (index_type i = 0; i < A->getNumRows(); ++i) + { + const index_type startA = aRowPointers[i]; + const index_type endA = aRowPointers[i + 1]; + const index_type startB = bRowPointers[i]; + const index_type endB = bRowPointers[i + 1]; + + index_type ptrA = startA; + index_type ptrB = startB; + + // Merge the non-zero elements for the current row + while (ptrA < endA || ptrB < endB) + { + // Case 1: All remaining elements are in A + if (ptrB >= endB) + { + newValues.push_back(aValues[ptrA]); + ptrA++; + } + // Case 2: All remaining elements are in B + else if (ptrA >= endA) + { + newValues.push_back(bValues[ptrB]); + ptrB++; + } + // Case 3: Elements in both A and B. Compare column indices. + else + { + if (aColIndices[ptrA] < bColIndices[ptrB]) + { + newValues.push_back(aValues[ptrA]); + ptrA++; + } + else if (bColIndices[ptrB] < aColIndices[ptrA]) + { + newValues.push_back(bValues[ptrB]); + ptrB++; + } + // Case 4: Same column index, add values + else + { + real_type sum = aValues[ptrA] + bValues[ptrB]; + newValues.push_back(sum); + ptrA++; + ptrB++; + } + } + } + } + assert(static_cast(newValues.size()) == pattern->getNnz()); + return updateMatrix(A, pattern->getRowData(), pattern->getColumnData(), newValues.data(), pattern->getNnz()); + } + /** * @brief Multiply csr matrix by a constant and add B. * @@ -599,11 +673,11 @@ namespace ReSolve } // Reuse sparsity pattern if it is available - // if (workspace_->scaleAddISetup()) - //{ - // ScaleAddIBuffer* pattern = workspace_->getScaleAddIBuffer(); - // return addIWithPattern(A, pattern); - //} + if (workspace_->scaleAddBSetup()) + { + ScaleAddBuffer* pattern = workspace_->getScaleAddBBuffer(); + return addBWithPattern(A, B, pattern); + } index_type const* const aRowPointers = A->getRowData(memory::HOST); index_type const* const aColIndices = A->getColData(memory::HOST); @@ -674,6 +748,10 @@ namespace ReSolve } newRowPointers.push_back(static_cast(newValues.size())); } - return updateMatrix(A, newRowPointers.data(), newColIndices.data(), newValues.data(), static_cast(newValues.size())); + + auto pattern = new ScaleAddBuffer(std::move(newRowPointers), std::move(newColIndices)); + // workspace_ owns pattern + workspace_->setScaleAddBBuffer(pattern); + return updateMatrix(A, pattern->getRowData(), pattern->getColumnData(), newValues.data(), pattern->getNnz()); } } // namespace ReSolve diff --git a/resolve/workspace/LinAlgWorkspaceCpu.cpp b/resolve/workspace/LinAlgWorkspaceCpu.cpp index 278f77d6c..673f148e7 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.cpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.cpp @@ -109,9 +109,33 @@ namespace ReSolve void LinAlgWorkspaceCpu::setScaleAddIBuffer(ScaleAddBuffer* buffer) { + assert(buffer != nullptr); assert(scaleAddIBuffer_ == nullptr); scaleAddIBuffer_ = buffer; scaleAddISetupDone(); } + bool LinAlgWorkspaceCpu::scaleAddBSetup() + { + return scaleAddBSetupDone_; + } + + void LinAlgWorkspaceCpu::scaleAddBSetupDone() + { + scaleAddBSetupDone_ = true; + } + + ScaleAddBuffer* LinAlgWorkspaceCpu::getScaleAddBBuffer() + { + assert(scaleAddBBuffer_ != nullptr); + return scaleAddBBuffer_; + } + + void LinAlgWorkspaceCpu::setScaleAddBBuffer(ScaleAddBuffer* buffer) + { + assert(scaleAddBBuffer_ == nullptr); + scaleAddBBuffer_ = buffer; + scaleAddBSetupDone(); + } + } // namespace ReSolve From 2bc776290ae037797a19322abeb302bce63848ef Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Mon, 17 Nov 2025 11:41:12 -0500 Subject: [PATCH 18/47] silence CUDA/HIP warnings --- resolve/matrix/MatrixHandlerCuda.cpp | 5 +++++ resolve/matrix/MatrixHandlerHip.cpp | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index bcffef79c..00c5fd11f 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -411,6 +411,8 @@ namespace ReSolve */ int MatrixHandlerCuda::scaleAddI(matrix::Csr* A, real_type alpha) { + (void)A; + (void)alpha; // NOT IMPLEMENTED return 1; } @@ -425,6 +427,9 @@ namespace ReSolve */ int MatrixHandlerCuda::scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr *B) { + (void)A; + (void)alpha; + (void)B; // NOT IMPLEMENTED return 1; } diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp index ba0b6039d..ba4fe73ea 100644 --- a/resolve/matrix/MatrixHandlerHip.cpp +++ b/resolve/matrix/MatrixHandlerHip.cpp @@ -379,6 +379,8 @@ namespace ReSolve */ int MatrixHandlerHip::scaleAddI(matrix::Csr* A, real_type alpha) { + (void)A; + (void)alpha; // NOT IMPLEMENTED return 1; } @@ -393,6 +395,9 @@ namespace ReSolve */ int MatrixHandlerHip::scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr *B) { + (void)A; + (void)alpha; + (void)B; // NOT IMPLEMENTED return 1; } From bdf049a5a768d410d2ce8e94fb9a179c50e50001 Mon Sep 17 00:00:00 2001 From: quantumsteve Date: Mon, 17 Nov 2025 16:42:25 +0000 Subject: [PATCH 19/47] Apply pre-commmit fixes --- resolve/matrix/MatrixHandlerCuda.cpp | 12 ++++++------ resolve/matrix/MatrixHandlerHip.cpp | 12 ++++++------ resolve/workspace/LinAlgWorkspaceCpu.hpp | 21 ++++++++++----------- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index 00c5fd11f..e4c34eed7 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -411,8 +411,8 @@ namespace ReSolve */ int MatrixHandlerCuda::scaleAddI(matrix::Csr* A, real_type alpha) { - (void)A; - (void)alpha; + (void) A; + (void) alpha; // NOT IMPLEMENTED return 1; } @@ -425,11 +425,11 @@ namespace ReSolve * @param[in] B - Sparse CSR matrix * @return 0 if successful, 1 otherwise */ - int MatrixHandlerCuda::scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr *B) + int MatrixHandlerCuda::scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B) { - (void)A; - (void)alpha; - (void)B; + (void) A; + (void) alpha; + (void) B; // NOT IMPLEMENTED return 1; } diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp index ba4fe73ea..3011ea17e 100644 --- a/resolve/matrix/MatrixHandlerHip.cpp +++ b/resolve/matrix/MatrixHandlerHip.cpp @@ -379,8 +379,8 @@ namespace ReSolve */ int MatrixHandlerHip::scaleAddI(matrix::Csr* A, real_type alpha) { - (void)A; - (void)alpha; + (void) A; + (void) alpha; // NOT IMPLEMENTED return 1; } @@ -393,11 +393,11 @@ namespace ReSolve * @param[in] B - Sparse CSR matrix * @return 0 if successful, 1 otherwise */ - int MatrixHandlerHip::scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr *B) + int MatrixHandlerHip::scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B) { - (void)A; - (void)alpha; - (void)B; + (void) A; + (void) alpha; + (void) B; // NOT IMPLEMENTED return 1; } diff --git a/resolve/workspace/LinAlgWorkspaceCpu.hpp b/resolve/workspace/LinAlgWorkspaceCpu.hpp index 0e073ffe7..e2045e4fc 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.hpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.hpp @@ -26,24 +26,23 @@ namespace ReSolve public: LinAlgWorkspaceCpu(); ~LinAlgWorkspaceCpu(); - void initializeHandles(); - void resetLinAlgWorkspace(); - bool scaleAddISetup(); - void scaleAddISetupDone(); + void initializeHandles(); + void resetLinAlgWorkspace(); + bool scaleAddISetup(); + void scaleAddISetupDone(); ScaleAddBuffer* getScaleAddIBuffer(); - void setScaleAddIBuffer(ScaleAddBuffer* buffer); - bool scaleAddBSetup(); - void scaleAddBSetupDone(); + void setScaleAddIBuffer(ScaleAddBuffer* buffer); + bool scaleAddBSetup(); + void scaleAddBSetupDone(); ScaleAddBuffer* getScaleAddBBuffer(); - void setScaleAddBBuffer(ScaleAddBuffer* buffer); - + void setScaleAddBBuffer(ScaleAddBuffer* buffer); private: // check if setup is done for scaleAddI i.e. if buffer is allocated, csr structure is set etc. - bool scaleAddISetupDone_{false}; + bool scaleAddISetupDone_{false}; ScaleAddBuffer* scaleAddIBuffer_{nullptr}; // check if setup is done for scaleAddB i.e. if buffer is allocated, csr structure is set etc. - bool scaleAddBSetupDone_{false}; + bool scaleAddBSetupDone_{false}; ScaleAddBuffer* scaleAddBBuffer_{nullptr}; }; From d06ecc93a2e91e56c1c704b974c3f6be604bf1be Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Mon, 17 Nov 2025 20:32:35 -0500 Subject: [PATCH 20/47] Simpler test found missing boolean flag Signed-off-by: Steven Hahn --- resolve/workspace/LinAlgWorkspaceCpu.cpp | 2 + tests/unit/matrix/MatrixHandlerTests.hpp | 59 +++++++++++++++++++++ tests/unit/matrix/runMatrixHandlerTests.cpp | 2 + 3 files changed, 63 insertions(+) diff --git a/resolve/workspace/LinAlgWorkspaceCpu.cpp b/resolve/workspace/LinAlgWorkspaceCpu.cpp index 673f148e7..dbf0fb085 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.cpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.cpp @@ -87,8 +87,10 @@ namespace ReSolve { delete scaleAddIBuffer_; scaleAddIBuffer_ = nullptr; + scaleAddISetupDone_ = false; delete scaleAddBBuffer_; scaleAddBBuffer_ = nullptr; + scaleAddBSetupDone_ = false; } bool LinAlgWorkspaceCpu::scaleAddISetup() diff --git a/tests/unit/matrix/MatrixHandlerTests.hpp b/tests/unit/matrix/MatrixHandlerTests.hpp index 48b61d8a6..047856f45 100644 --- a/tests/unit/matrix/MatrixHandlerTests.hpp +++ b/tests/unit/matrix/MatrixHandlerTests.hpp @@ -230,6 +230,34 @@ namespace ReSolve return status.report(testname.c_str()); } + TestOutcome scaleAddIZero(index_type n) + { + TestStatus status; + std::string testname(__func__); + matrix::Csr* A = createCsrEmptyMatrix(n); + real_type val = 2.; + + handler_.scaleAddI(A, val, memspace_); + if (memspace_ == memory::DEVICE) + { + A->syncData(memory::HOST); + } + // expected sum is 1 + status *= verifyScaleAddICsrMatrix(A, 1); + + // run again to reuse sparsity pattern. + handler_.scaleAddI(A, val, memspace_); + if (memspace_ == memory::DEVICE) + { + A->syncData(memory::HOST); + } + // expected sum is 2*1+1 + status *= verifyScaleAddICsrMatrix(A, 3); + + delete A; + return status.report(testname.c_str()); + } + TestOutcome scaleAddB(index_type n) { TestStatus status; @@ -577,6 +605,37 @@ namespace ReSolve return true; } + /** + * @brief Create a CSR matrix with preset sparsity structure + * + * The sparisty structure is such that each row has a different number of nonzeros + * The values are chosen so that the sum of each row is 30 + * + * @param[in] n number of rows and columns + * + * @return matrix::Csr* + */ + matrix::Csr* createCsrEmptyMatrix(const index_type n) + { + index_type nnz = 0; + matrix::Csr* A = new matrix::Csr(n, n, nnz); + A->allocateMatrixData(memory::HOST); + + index_type* rowptr = A->getRowData(memory::HOST); + + for (index_type i = 0; i < n + 1; ++i) + { + rowptr[i] = 0; + } + A->setUpdated(memory::HOST); + + if (memspace_ == memory::DEVICE) + { + A->syncData(memspace_); + } + return A; + } + /** * @brief Create a CSR matrix with preset sparsity structure * diff --git a/tests/unit/matrix/runMatrixHandlerTests.cpp b/tests/unit/matrix/runMatrixHandlerTests.cpp index 398130e80..3f8cb2393 100644 --- a/tests/unit/matrix/runMatrixHandlerTests.cpp +++ b/tests/unit/matrix/runMatrixHandlerTests.cpp @@ -81,6 +81,8 @@ void runTests(const std::string& backend, ReSolve::tests::TestingResults& result result += test.scaleAddI(100); workspace.resetLinAlgWorkspace(); result += test.scaleAddB(100); + workspace.resetLinAlgWorkspace(); + result += test.scaleAddIZero(100); #endif std::cout << "\n"; } From 42af7a854c32652c3b420113b881befbb2947a71 Mon Sep 17 00:00:00 2001 From: quantumsteve Date: Tue, 18 Nov 2025 01:35:13 +0000 Subject: [PATCH 21/47] Apply pre-commmit fixes --- resolve/workspace/LinAlgWorkspaceCpu.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/resolve/workspace/LinAlgWorkspaceCpu.cpp b/resolve/workspace/LinAlgWorkspaceCpu.cpp index dbf0fb085..d80e6264f 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.cpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.cpp @@ -86,10 +86,10 @@ namespace ReSolve void LinAlgWorkspaceCpu::resetLinAlgWorkspace() { delete scaleAddIBuffer_; - scaleAddIBuffer_ = nullptr; + scaleAddIBuffer_ = nullptr; scaleAddISetupDone_ = false; delete scaleAddBBuffer_; - scaleAddBBuffer_ = nullptr; + scaleAddBBuffer_ = nullptr; scaleAddBSetupDone_ = false; } From c32eee3f41046725f38459467a26945173876520 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Mon, 17 Nov 2025 15:57:36 -0500 Subject: [PATCH 22/47] CUDA implementation not working! Signed-off-by: Steven Hahn --- resolve/MemoryUtils.hpp | 2 +- resolve/matrix/MatrixHandlerCuda.cpp | 179 +++++++++++++++++++- resolve/matrix/MatrixHandlerCuda.hpp | 5 + resolve/workspace/LinAlgWorkspaceCUDA.cpp | 33 ++++ resolve/workspace/LinAlgWorkspaceCUDA.hpp | 10 ++ tests/unit/matrix/runMatrixHandlerTests.cpp | 2 +- 6 files changed, 225 insertions(+), 6 deletions(-) diff --git a/resolve/MemoryUtils.hpp b/resolve/MemoryUtils.hpp index 702d80ea6..ff0e9055d 100644 --- a/resolve/MemoryUtils.hpp +++ b/resolve/MemoryUtils.hpp @@ -38,7 +38,7 @@ namespace ReSolve * * @brief Provides basic memory allocation, free and copy functions. * - * This class provedes abstractions for memory management functiosn for + * This class provedes abstractions for memory management functions for * different GPU programming models. * * @tparam Policy - Memory management policy (vendor specific) diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index e4c34eed7..10ae8f4a4 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -1,6 +1,7 @@ #include "MatrixHandlerCuda.hpp" #include +#include #include #include @@ -401,6 +402,152 @@ namespace ReSolve return 0; } + void MatrixHandlerCuda::allocateForSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, + real_type beta, matrix::Csr* C, cusparseMatDescr_t& descr_a, void** buffer_add) + { + auto a_v = A->getValues(memory::DEVICE); + auto a_i = A->getRowData(memory::DEVICE); + auto a_j = A->getColData(memory::DEVICE); + + auto b_v = A->getValues(memory::DEVICE); + auto b_i = A->getRowData(memory::DEVICE); + auto b_j = A->getColData(memory::DEVICE); + + size_t buffer_byte_size_add; + index_type nnz_total; + + auto handle = workspace_->getCusparseHandle(); + cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST); + + index_type m = A->getNumRows(); + assert(m == B->getNumRows()); + index_type n = A->getNumColumns(); + assert(n == B->getNumColumns()); + index_type nnz_a = A->getNnz(); + index_type nnz_b = B->getNnz(); + + real_type* c_v = nullptr; + index_type* c_i = nullptr; + index_type* c_j = nullptr; + + mem_.allocateArrayOnDevice(&c_i, n + 1); + // calculates sum buffer + cusparseDcsrgeam2_bufferSizeExt(handle, + m, + n, + &alpha, + descr_a, + nnz_a, + a_v, + a_i, + a_j, + &beta, + descr_a, + nnz_b, + b_v, + b_i, + b_j, + descr_a, + c_v, + c_i, + c_j, + &buffer_byte_size_add); + + mem_.allocateBufferOnDevice(buffer_add, buffer_byte_size_add); + + // determines sum row offsets and total number of nonzeros + cusparseXcsrgeam2Nnz(handle, + m, + n, + descr_a, + nnz_a, + a_i, + a_j, + descr_a, + nnz_b, + b_i, + b_j, + descr_a, + c_i, + &nnz_total, + *buffer_add); + + C->setNnz(nnz_total); + C->allocateMatrixData(memory::DEVICE); + mem_.copyArrayDeviceToDevice(C->getRowData(memory::DEVICE), c_i, n + 1); + C->setUpdated(memory::DEVICE); + mem_.deleteOnDevice(c_i); + } + + void MatrixHandlerCuda::compute_sum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C, cusparseMatDescr_t& descr_a, void** buffer_add) + { + auto a_v = A->getValues(memory::DEVICE); + auto a_i = A->getRowData(memory::DEVICE); + auto a_j = A->getColData(memory::DEVICE); + + auto b_v = A->getValues(memory::DEVICE); + auto b_i = A->getRowData(memory::DEVICE); + auto b_j = A->getColData(memory::DEVICE); + + auto c_v = C->getValues(memory::DEVICE); + auto c_i = C->getRowData(memory::DEVICE); + auto c_j = C->getColData(memory::DEVICE); + + auto handle = workspace_->getCusparseHandle(); + cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST); + + index_type m = A->getNumRows(); + assert(m == B->getNumRows()); + assert(m == C->getNumRows()); + + index_type n = A->getNumColumns(); + assert(n == B->getNumColumns()); + assert(n == C->getNumColumns()); + + index_type nnz_a = A->getNnz(); + index_type nnz_b = B->getNnz(); + + cusparseDcsrgeam2(handle, + m, + n, + &alpha, + descr_a, + nnz_a, + a_v, + a_i, + a_j, + &beta, + descr_a, + nnz_b, + b_v, + b_i, + b_j, + descr_a, + c_v, + c_i, + c_j, + *buffer_add); + } + + /** + * @brief Update matrix with a new number of nonzero elements + * + * @param[in,out] A - Sparse matrix + * @param[in] row_data - pointer to row data (array of integers) + * @param[in] col_data - pointer to column data (array of integers) + * @param[in] val_data - pointer to value data (array of real numbers) + * @param[in] nnz - number of non-zer + * @return 0 if successful, 1 otherwise + */ + static int updateMatrix(matrix::Sparse* A, index_type* rowData, index_type* columnData, real_type* valData, index_type nnz) + { + if (A->destroyMatrixData(memory::DEVICE) != 0) + { + return 1; + } + return A->copyDataFrom(rowData, columnData, valData, nnz, memory::DEVICE, memory::DEVICE); + } + /** * @brief Add a constant to the nonzero values of a csr matrix, * then add the identity matrix. @@ -411,10 +558,34 @@ namespace ReSolve */ int MatrixHandlerCuda::scaleAddI(matrix::Csr* A, real_type alpha) { - (void) A; - (void) alpha; - // NOT IMPLEMENTED - return 1; + index_type n = A->getNumRows(); + + std::vector I_i(n + 1); + std::iota(I_i.begin(), I_i.end(), 0); + + std::vector I_j(n); + std::iota(I_j.begin(), I_j.end(), 0); + + std::vector I_v(n, 1.0); + + matrix::Csr I(A->getNumRows(), A->getNumColumns(), n); + I.copyDataFrom(I_i.data(), I_j.data(), I_v.data(), n, memory::HOST, memory::DEVICE); + + cusparseMatDescr_t descr_a; + cusparseCreateMatDescr(&descr_a); + cusparseSetMatType(descr_a, CUSPARSE_MATRIX_TYPE_GENERAL); + cusparseSetMatIndexBase(descr_a, CUSPARSE_INDEX_BASE_ZERO); + + void* buffer_add{nullptr}; + + matrix::Csr C(A->getNumRows(), A->getNumColumns(), A->getNnz()); + allocateForSum(A, alpha, &I, 1., &C, descr_a, &buffer_add); + + compute_sum(A, alpha, &I, 1., &C, descr_a, &buffer_add); + + updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz()); + + return 0; } /** diff --git a/resolve/matrix/MatrixHandlerCuda.hpp b/resolve/matrix/MatrixHandlerCuda.hpp index 570ea6edf..b1051cd7b 100644 --- a/resolve/matrix/MatrixHandlerCuda.hpp +++ b/resolve/matrix/MatrixHandlerCuda.hpp @@ -1,4 +1,7 @@ #pragma once + +#include + #include #include #include @@ -59,6 +62,8 @@ namespace ReSolve void setValuesChanged(bool isValuesChanged) override; private: + void allocateForSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C, cusparseMatDescr_t& descr_a, void** buffer_add); + void compute_sum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C, cusparseMatDescr_t& descr_a, void** buffer_add); LinAlgWorkspaceCUDA* workspace_{nullptr}; bool values_changed_{true}; ///< needed for matvec diff --git a/resolve/workspace/LinAlgWorkspaceCUDA.cpp b/resolve/workspace/LinAlgWorkspaceCUDA.cpp index fdb19c303..f545cdc75 100644 --- a/resolve/workspace/LinAlgWorkspaceCUDA.cpp +++ b/resolve/workspace/LinAlgWorkspaceCUDA.cpp @@ -8,6 +8,7 @@ namespace ReSolve handle_cusparse_ = nullptr; handle_cublas_ = nullptr; buffer_spmv_ = nullptr; + buffer_scale_add_ = nullptr; buffer_1norm_ = nullptr; transpose_workspace_ = nullptr; transpose_workspace_ready_ = false; @@ -21,6 +22,8 @@ namespace ReSolve { if (buffer_spmv_ != nullptr) mem_.deleteOnDevice(buffer_spmv_); + if (buffer_scale_add_ != nullptr) + mem_.deleteOnDevice(buffer_scale_add_); if (d_r_size_ != 0) mem_.deleteOnDevice(d_r_); if (norm_buffer_ready_) @@ -83,6 +86,11 @@ namespace ReSolve return buffer_1norm_; } + void* LinAlgWorkspaceCUDA::getScaleAddBuffer() + { + return buffer_scale_add_; + } + void* LinAlgWorkspaceCUDA::getTransposeBufferWorkspace() { return transpose_workspace_; @@ -114,6 +122,11 @@ namespace ReSolve buffer_spmv_ = buffer; } + void LinAlgWorkspaceCUDA::setScaleAddBuffer(void* buffer) + { + buffer_scale_add_ = buffer; + } + void LinAlgWorkspaceCUDA::setNormBuffer(void* buffer) { buffer_1norm_ = buffer; @@ -169,11 +182,21 @@ namespace ReSolve return mat_A_; } + cusparseMatDescr_t LinAlgWorkspaceCUDA::getScaleAddMatrixDescriptor() + { + return mat_B_; + } + void LinAlgWorkspaceCUDA::setSpmvMatrixDescriptor(cusparseSpMatDescr_t mat) { mat_A_ = mat; } + void LinAlgWorkspaceCUDA::setScaleAddMatrixDescriptor(cusparseMatDescr_t mat) + { + mat_B_ = mat; + } + cusparseDnVecDescr_t LinAlgWorkspaceCUDA::getVecX() { return vec_x_; @@ -204,6 +227,16 @@ namespace ReSolve matvec_setup_done_ = true; } + bool LinAlgWorkspaceCUDA::scaleAddSetup() + { + return scale_add_setup_done_; + } + + void LinAlgWorkspaceCUDA::scaleAddSetupDone() + { + scale_add_setup_done_ = true; + } + void LinAlgWorkspaceCUDA::initializeHandles() { cusparseCreate(&handle_cusparse_); diff --git a/resolve/workspace/LinAlgWorkspaceCUDA.hpp b/resolve/workspace/LinAlgWorkspaceCUDA.hpp index ac996b9bb..f566b50c8 100644 --- a/resolve/workspace/LinAlgWorkspaceCUDA.hpp +++ b/resolve/workspace/LinAlgWorkspaceCUDA.hpp @@ -19,16 +19,19 @@ namespace ReSolve // accessors void* getSpmvBuffer(); void* getNormBuffer(); + void* getScaleAddBuffer(); void* getTransposeBufferWorkspace(); void setTransposeBufferWorkspace(size_t bufferSize); bool isTransposeBufferAllocated(); void setSpmvBuffer(void* buffer); void setNormBuffer(void* buffer); + void setScaleAddBuffer(void* buffer); cublasHandle_t getCublasHandle(); cusolverSpHandle_t getCusolverSpHandle(); // needed for 1-norms etc cusparseHandle_t getCusparseHandle(); cusparseSpMatDescr_t getSpmvMatrixDescriptor(); + cusparseMatDescr_t getScaleAddMatrixDescriptor(); cusparseDnVecDescr_t getVecX(); cusparseDnVecDescr_t getVecY(); index_type getDrSize(); @@ -39,6 +42,7 @@ namespace ReSolve void setCusolverSpHandle(cusolverSpHandle_t handle); void setCusparseHandle(cusparseHandle_t handle); void setSpmvMatrixDescriptor(cusparseSpMatDescr_t mat); + void setScaleAddMatrixDescriptor(cusparseMatDescr_t mat); void setDrSize(index_type new_sz); void setDr(real_type* new_dr); void setNormBufferState(bool r); @@ -48,6 +52,9 @@ namespace ReSolve bool matvecSetup(); void matvecSetupDone(); + bool scaleAddSetup(); + void scaleAddSetupDone(); + private: // handles cublasHandle_t handle_cublas_; @@ -56,6 +63,7 @@ namespace ReSolve // matrix descriptors cusparseSpMatDescr_t mat_A_; + cusparseMatDescr_t mat_B_; // vector descriptors cusparseDnVecDescr_t vec_x_; @@ -64,8 +72,10 @@ namespace ReSolve // buffers void* buffer_spmv_{nullptr}; void* buffer_1norm_{nullptr}; + void* buffer_scale_add_{nullptr}; bool matvec_setup_done_{false}; // check if setup is done for matvec i.e. if buffer is allocated, csr structure is set etc. + bool scale_add_setup_done_{false}; void* transpose_workspace_{nullptr}; // needed for transpose bool transpose_workspace_ready_{false}; // to track if allocated diff --git a/tests/unit/matrix/runMatrixHandlerTests.cpp b/tests/unit/matrix/runMatrixHandlerTests.cpp index 3f8cb2393..963de6583 100644 --- a/tests/unit/matrix/runMatrixHandlerTests.cpp +++ b/tests/unit/matrix/runMatrixHandlerTests.cpp @@ -76,9 +76,9 @@ void runTests(const std::string& backend, ReSolve::tests::TestingResults& result result += test.rightScale(1024, 2048); workspace.resetLinAlgWorkspace(); result += test.rightScale(2048, 1024); -#if !defined(RESOLVE_USE_CUDA) && !defined(RESOLVE_USE_HIP) workspace.resetLinAlgWorkspace(); result += test.scaleAddI(100); +#if !defined(RESOLVE_USE_CUDA) && !defined(RESOLVE_USE_HIP) workspace.resetLinAlgWorkspace(); result += test.scaleAddB(100); workspace.resetLinAlgWorkspace(); From 3f10bc4220b74127c6f3bcc5514238a7e93723ef Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Mon, 17 Nov 2025 16:23:05 -0500 Subject: [PATCH 23/47] memory leak Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandlerCuda.cpp | 65 +++++----------------------- 1 file changed, 10 insertions(+), 55 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index 10ae8f4a4..73e6cb1b1 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -423,6 +423,9 @@ namespace ReSolve assert(m == B->getNumRows()); index_type n = A->getNumColumns(); assert(n == B->getNumColumns()); + + assert(m == n); + index_type nnz_a = A->getNnz(); index_type nnz_b = B->getNnz(); @@ -432,45 +435,12 @@ namespace ReSolve mem_.allocateArrayOnDevice(&c_i, n + 1); // calculates sum buffer - cusparseDcsrgeam2_bufferSizeExt(handle, - m, - n, - &alpha, - descr_a, - nnz_a, - a_v, - a_i, - a_j, - &beta, - descr_a, - nnz_b, - b_v, - b_i, - b_j, - descr_a, - c_v, - c_i, - c_j, - &buffer_byte_size_add); + cusparseDcsrgeam2_bufferSizeExt(handle, m, n, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j, &buffer_byte_size_add); mem_.allocateBufferOnDevice(buffer_add, buffer_byte_size_add); // determines sum row offsets and total number of nonzeros - cusparseXcsrgeam2Nnz(handle, - m, - n, - descr_a, - nnz_a, - a_i, - a_j, - descr_a, - nnz_b, - b_i, - b_j, - descr_a, - c_i, - &nnz_total, - *buffer_add); + cusparseXcsrgeam2Nnz(handle, m, n, descr_a, nnz_a, a_i, a_j, descr_a, nnz_b, b_i, b_j, descr_a, c_i, &nnz_total, *buffer_add); C->setNnz(nnz_total); C->allocateMatrixData(memory::DEVICE); @@ -504,29 +474,12 @@ namespace ReSolve assert(n == B->getNumColumns()); assert(n == C->getNumColumns()); + assert(m == n); + index_type nnz_a = A->getNnz(); index_type nnz_b = B->getNnz(); - cusparseDcsrgeam2(handle, - m, - n, - &alpha, - descr_a, - nnz_a, - a_v, - a_i, - a_j, - &beta, - descr_a, - nnz_b, - b_v, - b_i, - b_j, - descr_a, - c_v, - c_i, - c_j, - *buffer_add); + cusparseDcsrgeam2(handle, m, n, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j, *buffer_add); } /** @@ -585,6 +538,8 @@ namespace ReSolve updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz()); + mem_.deleteOnDevice(buffer_add); + return 0; } From 0dbd91afb27c962e2342559d9a50ff82eb2f2a78 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Tue, 18 Nov 2025 09:14:46 -0500 Subject: [PATCH 24/47] fix nullptr check Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandler.cpp | 2 +- resolve/matrix/MatrixHandlerCuda.cpp | 9 ++++++--- tests/unit/matrix/runMatrixHandlerTests.cpp | 5 ++++- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index fa2dd6c38..868715097 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -361,7 +361,7 @@ namespace ReSolve int MatrixHandler::scaleAddI(matrix::Csr* A, real_type alpha, memory::MemorySpace memspace) { assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix is assumed to be in CSR format.\n"); - assert(A->getValues(memspace) != nullptr && "Matrix values are null!\n"); + assert((A->getValues(memspace) != nullptr || A->getNnz() == 0) && "Matrix values are null!\n"); assert(A->getNumRows() == A->getNumColumns() && "Matrix must be square.\n"); using namespace ReSolve::memory; switch (memspace) diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index 73e6cb1b1..afc2135d6 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -435,12 +435,14 @@ namespace ReSolve mem_.allocateArrayOnDevice(&c_i, n + 1); // calculates sum buffer - cusparseDcsrgeam2_bufferSizeExt(handle, m, n, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j, &buffer_byte_size_add); + cusparseStatus_t info = cusparseDcsrgeam2_bufferSizeExt(handle, m, n, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j, &buffer_byte_size_add); + assert(info == CUSPARSE_STATUS_SUCCESS); mem_.allocateBufferOnDevice(buffer_add, buffer_byte_size_add); // determines sum row offsets and total number of nonzeros - cusparseXcsrgeam2Nnz(handle, m, n, descr_a, nnz_a, a_i, a_j, descr_a, nnz_b, b_i, b_j, descr_a, c_i, &nnz_total, *buffer_add); + info = cusparseXcsrgeam2Nnz(handle, m, n, descr_a, nnz_a, a_i, a_j, descr_a, nnz_b, b_i, b_j, descr_a, c_i, &nnz_total, *buffer_add); + assert(info == CUSPARSE_STATUS_SUCCESS); C->setNnz(nnz_total); C->allocateMatrixData(memory::DEVICE); @@ -479,7 +481,8 @@ namespace ReSolve index_type nnz_a = A->getNnz(); index_type nnz_b = B->getNnz(); - cusparseDcsrgeam2(handle, m, n, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j, *buffer_add); + cusparseStatus_t info = cusparseDcsrgeam2(handle, m, n, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j, *buffer_add); + assert(info == CUSPARSE_STATUS_SUCCESS); } /** diff --git a/tests/unit/matrix/runMatrixHandlerTests.cpp b/tests/unit/matrix/runMatrixHandlerTests.cpp index 963de6583..47a3cd075 100644 --- a/tests/unit/matrix/runMatrixHandlerTests.cpp +++ b/tests/unit/matrix/runMatrixHandlerTests.cpp @@ -77,12 +77,15 @@ void runTests(const std::string& backend, ReSolve::tests::TestingResults& result workspace.resetLinAlgWorkspace(); result += test.rightScale(2048, 1024); workspace.resetLinAlgWorkspace(); +#if !defined(RESOLVE_USE_HIP) result += test.scaleAddI(100); -#if !defined(RESOLVE_USE_CUDA) && !defined(RESOLVE_USE_HIP) workspace.resetLinAlgWorkspace(); +#if !defined(RESOLVE_USE_CUDA) result += test.scaleAddB(100); workspace.resetLinAlgWorkspace(); result += test.scaleAddIZero(100); + workspace.resetLinAlgWorkspace(); +#endif #endif std::cout << "\n"; } From e98fca6245e7af0f6c5befb82cdfe67be896ceab Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Tue, 18 Nov 2025 09:24:46 -0500 Subject: [PATCH 25/47] pointed at wrong matrix Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandlerCuda.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index afc2135d6..7fb556447 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -409,9 +409,9 @@ namespace ReSolve auto a_i = A->getRowData(memory::DEVICE); auto a_j = A->getColData(memory::DEVICE); - auto b_v = A->getValues(memory::DEVICE); - auto b_i = A->getRowData(memory::DEVICE); - auto b_j = A->getColData(memory::DEVICE); + auto b_v = B->getValues(memory::DEVICE); + auto b_i = B->getRowData(memory::DEVICE); + auto b_j = B->getColData(memory::DEVICE); size_t buffer_byte_size_add; index_type nnz_total; @@ -457,9 +457,9 @@ namespace ReSolve auto a_i = A->getRowData(memory::DEVICE); auto a_j = A->getColData(memory::DEVICE); - auto b_v = A->getValues(memory::DEVICE); - auto b_i = A->getRowData(memory::DEVICE); - auto b_j = A->getColData(memory::DEVICE); + auto b_v = B->getValues(memory::DEVICE); + auto b_i = B->getRowData(memory::DEVICE); + auto b_j = B->getColData(memory::DEVICE); auto c_v = C->getValues(memory::DEVICE); auto c_i = C->getRowData(memory::DEVICE); From 1b6082365a64c4d0a710e3738b71304f4770d367 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Mon, 17 Nov 2025 20:32:35 -0500 Subject: [PATCH 26/47] Simpler test found missing boolean flag Signed-off-by: Steven Hahn --- tests/unit/matrix/runMatrixHandlerTests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/matrix/runMatrixHandlerTests.cpp b/tests/unit/matrix/runMatrixHandlerTests.cpp index 47a3cd075..e85ba4687 100644 --- a/tests/unit/matrix/runMatrixHandlerTests.cpp +++ b/tests/unit/matrix/runMatrixHandlerTests.cpp @@ -80,7 +80,7 @@ void runTests(const std::string& backend, ReSolve::tests::TestingResults& result #if !defined(RESOLVE_USE_HIP) result += test.scaleAddI(100); workspace.resetLinAlgWorkspace(); -#if !defined(RESOLVE_USE_CUDA) +#if 0 result += test.scaleAddB(100); workspace.resetLinAlgWorkspace(); result += test.scaleAddIZero(100); From 1327d9d8fc6026bf47613fb5e9028f63573ab9ff Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Mon, 17 Nov 2025 15:57:36 -0500 Subject: [PATCH 27/47] CUDA implementation not working! Signed-off-by: Steven Hahn --- tests/unit/matrix/runMatrixHandlerTests.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/matrix/runMatrixHandlerTests.cpp b/tests/unit/matrix/runMatrixHandlerTests.cpp index e85ba4687..70dc2eb1f 100644 --- a/tests/unit/matrix/runMatrixHandlerTests.cpp +++ b/tests/unit/matrix/runMatrixHandlerTests.cpp @@ -79,6 +79,7 @@ void runTests(const std::string& backend, ReSolve::tests::TestingResults& result workspace.resetLinAlgWorkspace(); #if !defined(RESOLVE_USE_HIP) result += test.scaleAddI(100); +#if !defined(RESOLVE_USE_CUDA) && !defined(RESOLVE_USE_HIP) workspace.resetLinAlgWorkspace(); #if 0 result += test.scaleAddB(100); From 0b490d7241fe0ad3acd42f9ee72d26ab800ad22a Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Tue, 18 Nov 2025 11:45:03 -0500 Subject: [PATCH 28/47] working? Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandlerCuda.cpp | 6 +++++- tests/unit/matrix/runMatrixHandlerTests.cpp | 5 ++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index 7fb556447..8aba55a29 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -447,7 +447,6 @@ namespace ReSolve C->setNnz(nnz_total); C->allocateMatrixData(memory::DEVICE); mem_.copyArrayDeviceToDevice(C->getRowData(memory::DEVICE), c_i, n + 1); - C->setUpdated(memory::DEVICE); mem_.deleteOnDevice(c_i); } @@ -483,6 +482,7 @@ namespace ReSolve cusparseStatus_t info = cusparseDcsrgeam2(handle, m, n, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j, *buffer_add); assert(info == CUSPARSE_STATUS_SUCCESS); + C->setUpdated(memory::DEVICE); } /** @@ -501,6 +501,10 @@ namespace ReSolve { return 1; } + if (A->destroyMatrixData(memory::HOST) != 0) + { + return 1; + } return A->copyDataFrom(rowData, columnData, valData, nnz, memory::DEVICE, memory::DEVICE); } diff --git a/tests/unit/matrix/runMatrixHandlerTests.cpp b/tests/unit/matrix/runMatrixHandlerTests.cpp index 70dc2eb1f..0de2bbe79 100644 --- a/tests/unit/matrix/runMatrixHandlerTests.cpp +++ b/tests/unit/matrix/runMatrixHandlerTests.cpp @@ -79,13 +79,12 @@ void runTests(const std::string& backend, ReSolve::tests::TestingResults& result workspace.resetLinAlgWorkspace(); #if !defined(RESOLVE_USE_HIP) result += test.scaleAddI(100); -#if !defined(RESOLVE_USE_CUDA) && !defined(RESOLVE_USE_HIP) + workspace.resetLinAlgWorkspace(); + result += test.scaleAddIZero(100); workspace.resetLinAlgWorkspace(); #if 0 result += test.scaleAddB(100); workspace.resetLinAlgWorkspace(); - result += test.scaleAddIZero(100); - workspace.resetLinAlgWorkspace(); #endif #endif std::cout << "\n"; From c9432cbb4b1edc922e91d907a9f76cc62dee3114 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Tue, 18 Nov 2025 12:40:06 -0500 Subject: [PATCH 29/47] working? Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandlerCuda.cpp | 24 +++++++++++++++------ tests/unit/matrix/runMatrixHandlerTests.cpp | 2 -- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index 8aba55a29..c9601413c 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -530,7 +530,7 @@ namespace ReSolve matrix::Csr I(A->getNumRows(), A->getNumColumns(), n); I.copyDataFrom(I_i.data(), I_j.data(), I_v.data(), n, memory::HOST, memory::DEVICE); - + cusparseMatDescr_t descr_a; cusparseCreateMatDescr(&descr_a); cusparseSetMatType(descr_a, CUSPARSE_MATRIX_TYPE_GENERAL); @@ -546,7 +546,6 @@ namespace ReSolve updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz()); mem_.deleteOnDevice(buffer_add); - return 0; } @@ -560,11 +559,22 @@ namespace ReSolve */ int MatrixHandlerCuda::scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B) { - (void) A; - (void) alpha; - (void) B; - // NOT IMPLEMENTED - return 1; + cusparseMatDescr_t descr_a; + cusparseCreateMatDescr(&descr_a); + cusparseSetMatType(descr_a, CUSPARSE_MATRIX_TYPE_GENERAL); + cusparseSetMatIndexBase(descr_a, CUSPARSE_INDEX_BASE_ZERO); + + void* buffer_add{nullptr}; + + matrix::Csr C(A->getNumRows(), A->getNumColumns(), A->getNnz()); + allocateForSum(A, alpha, B, 1., &C, descr_a, &buffer_add); + + compute_sum(A, alpha, B, 1., &C, descr_a, &buffer_add); + + updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz()); + + mem_.deleteOnDevice(buffer_add); + return 0; } } // namespace ReSolve diff --git a/tests/unit/matrix/runMatrixHandlerTests.cpp b/tests/unit/matrix/runMatrixHandlerTests.cpp index 0de2bbe79..99ef35a40 100644 --- a/tests/unit/matrix/runMatrixHandlerTests.cpp +++ b/tests/unit/matrix/runMatrixHandlerTests.cpp @@ -82,10 +82,8 @@ void runTests(const std::string& backend, ReSolve::tests::TestingResults& result workspace.resetLinAlgWorkspace(); result += test.scaleAddIZero(100); workspace.resetLinAlgWorkspace(); -#if 0 result += test.scaleAddB(100); workspace.resetLinAlgWorkspace(); -#endif #endif std::cout << "\n"; } From 47002b2b9ccbb23e92e4f3142af703dabd02cb31 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Tue, 18 Nov 2025 16:27:37 -0500 Subject: [PATCH 30/47] checkpoint Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandlerCpu.cpp | 12 +-- resolve/matrix/MatrixHandlerCuda.cpp | 68 +++++-------- resolve/matrix/MatrixHandlerCuda.hpp | 4 +- resolve/workspace/LinAlgWorkspaceCUDA.cpp | 111 +++++++++++++++++++--- resolve/workspace/LinAlgWorkspaceCUDA.hpp | 37 ++++++-- resolve/workspace/LinAlgWorkspaceCpu.cpp | 22 ++--- resolve/workspace/LinAlgWorkspaceCpu.hpp | 16 ++-- 7 files changed, 182 insertions(+), 88 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index cd1823179..499a8fe3c 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -435,7 +435,7 @@ namespace ReSolve * @param[in] pattern - precalculated sparsity pattern * @return 0 if successful, 1 otherwise */ - static int addIWithPattern(matrix::Csr* A, ScaleAddBuffer* pattern) + static int addIWithPattern(matrix::Csr* A, ScaleAddBufferCpu* pattern) { std::vector newValues(pattern->getNnz()); @@ -510,7 +510,7 @@ namespace ReSolve // Reuse sparsity pattern if it is available if (workspace_->scaleAddISetup()) { - ScaleAddBuffer* pattern = workspace_->getScaleAddIBuffer(); + ScaleAddBufferCpu* pattern = workspace_->getScaleAddIBuffer(); return addIWithPattern(A, pattern); } @@ -575,7 +575,7 @@ namespace ReSolve newRowPointers[A->getNumRows()] = newNnzCount; assert(newNnzCount <= maxNnzCount); newColumnIndices.resize(newNnzCount); - auto pattern = new ScaleAddBuffer(std::move(newRowPointers), std::move(newColumnIndices)); + auto pattern = new ScaleAddBufferCpu(std::move(newRowPointers), std::move(newColumnIndices)); // workspace_ owns pattern workspace_->setScaleAddIBuffer(pattern); updateMatrix(A, pattern->getRowData(), pattern->getColumnData(), newValues.data(), pattern->getNnz()); @@ -590,7 +590,7 @@ namespace ReSolve * @param[in] pattern - precalculated sparsity pattern * @return 0 if successful, 1 otherwise */ - static int addBWithPattern(matrix::Csr* A, matrix::Csr* B, ScaleAddBuffer* pattern) + static int addBWithPattern(matrix::Csr* A, matrix::Csr* B, ScaleAddBufferCpu* pattern) { index_type const* const aRowPointers = A->getRowData(memory::HOST); index_type const* const aColIndices = A->getColData(memory::HOST); @@ -675,7 +675,7 @@ namespace ReSolve // Reuse sparsity pattern if it is available if (workspace_->scaleAddBSetup()) { - ScaleAddBuffer* pattern = workspace_->getScaleAddBBuffer(); + ScaleAddBufferCpu* pattern = workspace_->getScaleAddBBuffer(); return addBWithPattern(A, B, pattern); } @@ -749,7 +749,7 @@ namespace ReSolve newRowPointers.push_back(static_cast(newValues.size())); } - auto pattern = new ScaleAddBuffer(std::move(newRowPointers), std::move(newColIndices)); + auto pattern = new ScaleAddBufferCpu(std::move(newRowPointers), std::move(newColIndices)); // workspace_ owns pattern workspace_->setScaleAddBBuffer(pattern); return updateMatrix(A, pattern->getRowData(), pattern->getColumnData(), newValues.data(), pattern->getNnz()); diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index c9601413c..f0995110d 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -402,9 +402,18 @@ namespace ReSolve return 0; } - void MatrixHandlerCuda::allocateForSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, - real_type beta, matrix::Csr* C, cusparseMatDescr_t& descr_a, void** buffer_add) + void MatrixHandlerCuda::allocateForSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C) { + auto handle = workspace_->getCusparseHandle(); + cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST); + workspace_->setCusparseHandle(handle); + + cusparseMatDescr_t descr_a = workspace_->getScaleAddMatrixDescriptor(); + cusparseCreateMatDescr(&descr_a); + cusparseSetMatType(descr_a, CUSPARSE_MATRIX_TYPE_GENERAL); + cusparseSetMatIndexBase(descr_a, CUSPARSE_INDEX_BASE_ZERO); + workspace_->setScaleAddMatrixDescriptor(descr_a); + auto a_v = A->getValues(memory::DEVICE); auto a_i = A->getRowData(memory::DEVICE); auto a_j = A->getColData(memory::DEVICE); @@ -413,12 +422,6 @@ namespace ReSolve auto b_i = B->getRowData(memory::DEVICE); auto b_j = B->getColData(memory::DEVICE); - size_t buffer_byte_size_add; - index_type nnz_total; - - auto handle = workspace_->getCusparseHandle(); - cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST); - index_type m = A->getNumRows(); assert(m == B->getNumRows()); index_type n = A->getNumColumns(); @@ -433,24 +436,25 @@ namespace ReSolve index_type* c_i = nullptr; index_type* c_j = nullptr; - mem_.allocateArrayOnDevice(&c_i, n + 1); + size_t buffer_byte_size_add; // calculates sum buffer cusparseStatus_t info = cusparseDcsrgeam2_bufferSizeExt(handle, m, n, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j, &buffer_byte_size_add); assert(info == CUSPARSE_STATUS_SUCCESS); - mem_.allocateBufferOnDevice(buffer_add, buffer_byte_size_add); + auto buffer = new ScaleAddBufferCUDA(n + 1, buffer_byte_size_add); + workspace_->setScaleAddBBuffer(buffer); + index_type nnz_total; // determines sum row offsets and total number of nonzeros - info = cusparseXcsrgeam2Nnz(handle, m, n, descr_a, nnz_a, a_i, a_j, descr_a, nnz_b, b_i, b_j, descr_a, c_i, &nnz_total, *buffer_add); + info = cusparseXcsrgeam2Nnz(handle, m, n, descr_a, nnz_a, a_i, a_j, descr_a, nnz_b, b_i, b_j, descr_a, buffer->getRowData(), &nnz_total, buffer->getBuffer()); assert(info == CUSPARSE_STATUS_SUCCESS); C->setNnz(nnz_total); C->allocateMatrixData(memory::DEVICE); - mem_.copyArrayDeviceToDevice(C->getRowData(memory::DEVICE), c_i, n + 1); - mem_.deleteOnDevice(c_i); + mem_.copyArrayDeviceToDevice(C->getRowData(memory::DEVICE), buffer->getRowData(), n + 1); } - void MatrixHandlerCuda::compute_sum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C, cusparseMatDescr_t& descr_a, void** buffer_add) + void MatrixHandlerCuda::computeSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C) { auto a_v = A->getValues(memory::DEVICE); auto a_i = A->getRowData(memory::DEVICE); @@ -465,7 +469,6 @@ namespace ReSolve auto c_j = C->getColData(memory::DEVICE); auto handle = workspace_->getCusparseHandle(); - cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST); index_type m = A->getNumRows(); assert(m == B->getNumRows()); @@ -480,7 +483,10 @@ namespace ReSolve index_type nnz_a = A->getNnz(); index_type nnz_b = B->getNnz(); - cusparseStatus_t info = cusparseDcsrgeam2(handle, m, n, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j, *buffer_add); + ScaleAddBufferCUDA* buffer = workspace_->getScaleAddBBuffer(); + mem_.copyArrayDeviceToDevice(c_i, buffer->getRowData(), n + 1); + cusparseMatDescr_t descr_a = workspace_->getScaleAddMatrixDescriptor(); + cusparseStatus_t info = cusparseDcsrgeam2(handle, m, n, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j, buffer->getBuffer()); assert(info == CUSPARSE_STATUS_SUCCESS); C->setUpdated(memory::DEVICE); } @@ -531,22 +537,7 @@ namespace ReSolve matrix::Csr I(A->getNumRows(), A->getNumColumns(), n); I.copyDataFrom(I_i.data(), I_j.data(), I_v.data(), n, memory::HOST, memory::DEVICE); - cusparseMatDescr_t descr_a; - cusparseCreateMatDescr(&descr_a); - cusparseSetMatType(descr_a, CUSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatIndexBase(descr_a, CUSPARSE_INDEX_BASE_ZERO); - - void* buffer_add{nullptr}; - - matrix::Csr C(A->getNumRows(), A->getNumColumns(), A->getNnz()); - allocateForSum(A, alpha, &I, 1., &C, descr_a, &buffer_add); - - compute_sum(A, alpha, &I, 1., &C, descr_a, &buffer_add); - - updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz()); - - mem_.deleteOnDevice(buffer_add); - return 0; + return scaleAddB(A, alpha, &I); } /** @@ -559,21 +550,12 @@ namespace ReSolve */ int MatrixHandlerCuda::scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B) { - cusparseMatDescr_t descr_a; - cusparseCreateMatDescr(&descr_a); - cusparseSetMatType(descr_a, CUSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatIndexBase(descr_a, CUSPARSE_INDEX_BASE_ZERO); - - void* buffer_add{nullptr}; - matrix::Csr C(A->getNumRows(), A->getNumColumns(), A->getNnz()); - allocateForSum(A, alpha, B, 1., &C, descr_a, &buffer_add); + allocateForSum(A, alpha, B, 1., &C); - compute_sum(A, alpha, B, 1., &C, descr_a, &buffer_add); + computeSum(A, alpha, B, 1., &C); updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz()); - - mem_.deleteOnDevice(buffer_add); return 0; } diff --git a/resolve/matrix/MatrixHandlerCuda.hpp b/resolve/matrix/MatrixHandlerCuda.hpp index b1051cd7b..4a98a0a26 100644 --- a/resolve/matrix/MatrixHandlerCuda.hpp +++ b/resolve/matrix/MatrixHandlerCuda.hpp @@ -62,8 +62,8 @@ namespace ReSolve void setValuesChanged(bool isValuesChanged) override; private: - void allocateForSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C, cusparseMatDescr_t& descr_a, void** buffer_add); - void compute_sum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C, cusparseMatDescr_t& descr_a, void** buffer_add); + void allocateForSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C); + void computeSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C); LinAlgWorkspaceCUDA* workspace_{nullptr}; bool values_changed_{true}; ///< needed for matvec diff --git a/resolve/workspace/LinAlgWorkspaceCUDA.cpp b/resolve/workspace/LinAlgWorkspaceCUDA.cpp index f545cdc75..8ab64df81 100644 --- a/resolve/workspace/LinAlgWorkspaceCUDA.cpp +++ b/resolve/workspace/LinAlgWorkspaceCUDA.cpp @@ -2,13 +2,80 @@ namespace ReSolve { + + /** + * @brief Store sparsity pattern + * + * @param[in] row_data - pointer to row data (array of integers, length:nrows+1) + * @param[in] nrows - number of rows + * @param[in] col_data - pointer to column data (array of integers, length: nnz) + * @param[in] nnz - number of non-zeros + */ + ScaleAddBufferCUDA::ScaleAddBufferCUDA(index_type numRows, size_t bufferSize) + : numRows_(numRows), bufferSize_(bufferSize) + { + mem_.allocateArrayOnDevice(&rowData_, numRows_ + 1); + mem_.allocateBufferOnDevice(&buffer_, bufferSize_); + } + + /** + * @brief Destructor + * + */ + ScaleAddBufferCUDA::~ScaleAddBufferCUDA() + { + mem_.deleteOnDevice(rowData_); + mem_.deleteOnDevice(buffer_); + } + + /** + * @brief Retrieve row sparsity pattern + * + * @return precalculated row pointers + */ + index_type* ScaleAddBufferCUDA::getRowData() + { + return rowData_; + } + + /** + * @brief Retrieve row sparsity pattern + * + * @return precalculated row pointers + */ + void* ScaleAddBufferCUDA::getBuffer() + { + return buffer_; + } + + /** + * @brief get number of matrix rows + * + * @return number of matrix rows. + */ + index_type ScaleAddBufferCUDA::getNumRows() + { + return numRows_; + } + + /** + * @brief Get number of non-zeros. + * + * @return number of non-zeros + */ + index_type ScaleAddBufferCUDA::getNnz() + { + return rowData_[numRows_]; + } + LinAlgWorkspaceCUDA::LinAlgWorkspaceCUDA() { handle_cusolversp_ = nullptr; handle_cusparse_ = nullptr; handle_cublas_ = nullptr; buffer_spmv_ = nullptr; - buffer_scale_add_ = nullptr; + buffer_scale_add_i = nullptr; + buffer_scale_add_b = nullptr; buffer_1norm_ = nullptr; transpose_workspace_ = nullptr; transpose_workspace_ready_ = false; @@ -22,8 +89,10 @@ namespace ReSolve { if (buffer_spmv_ != nullptr) mem_.deleteOnDevice(buffer_spmv_); - if (buffer_scale_add_ != nullptr) - mem_.deleteOnDevice(buffer_scale_add_); + if (buffer_scale_add_i != nullptr) + mem_.deleteOnDevice(buffer_scale_add_i); + if (buffer_scale_add_b != nullptr) + mem_.deleteOnDevice(buffer_scale_add_b); if (d_r_size_ != 0) mem_.deleteOnDevice(d_r_); if (norm_buffer_ready_) @@ -86,9 +155,14 @@ namespace ReSolve return buffer_1norm_; } - void* LinAlgWorkspaceCUDA::getScaleAddBuffer() + ScaleAddBufferCUDA* LinAlgWorkspaceCUDA::getScaleAddIBuffer() { - return buffer_scale_add_; + return buffer_scale_add_i; + } + + ScaleAddBufferCUDA* LinAlgWorkspaceCUDA::getScaleAddBBuffer() + { + return buffer_scale_add_b; } void* LinAlgWorkspaceCUDA::getTransposeBufferWorkspace() @@ -122,9 +196,24 @@ namespace ReSolve buffer_spmv_ = buffer; } - void LinAlgWorkspaceCUDA::setScaleAddBuffer(void* buffer) + void LinAlgWorkspaceCUDA::setScaleAddBBuffer(ScaleAddBufferCUDA* buffer) + { + buffer_scale_add_b = buffer; + } + + void LinAlgWorkspaceCUDA::setScaleAddIBuffer(ScaleAddBufferCUDA* buffer) + { + buffer_scale_add_i = buffer; + } + + void LinAlgWorkspaceCUDA::scaleAddBSetupDone() + { + scale_add_b_setup_done_ = true; + } + + void LinAlgWorkspaceCUDA::scaleAddISetupDone() { - buffer_scale_add_ = buffer; + scale_add_i_setup_done_ = true; } void LinAlgWorkspaceCUDA::setNormBuffer(void* buffer) @@ -227,14 +316,14 @@ namespace ReSolve matvec_setup_done_ = true; } - bool LinAlgWorkspaceCUDA::scaleAddSetup() + bool LinAlgWorkspaceCUDA::scaleAddISetup() { - return scale_add_setup_done_; + return scale_add_i_setup_done_; } - void LinAlgWorkspaceCUDA::scaleAddSetupDone() + bool LinAlgWorkspaceCUDA::scaleAddBSetup() { - scale_add_setup_done_ = true; + return scale_add_b_setup_done_; } void LinAlgWorkspaceCUDA::initializeHandles() diff --git a/resolve/workspace/LinAlgWorkspaceCUDA.hpp b/resolve/workspace/LinAlgWorkspaceCUDA.hpp index f566b50c8..d34b561e1 100644 --- a/resolve/workspace/LinAlgWorkspaceCUDA.hpp +++ b/resolve/workspace/LinAlgWorkspaceCUDA.hpp @@ -8,6 +8,25 @@ namespace ReSolve { + + class ScaleAddBufferCUDA + { + public: + ScaleAddBufferCUDA(index_type numRows, size_t bufferSize); + ~ScaleAddBufferCUDA(); + index_type* getRowData(); + void* getBuffer(); + index_type getNumRows(); + index_type getNnz(); + + private: + index_type* rowData_; + void* buffer_; + index_type numRows_; + size_t bufferSize_; + MemoryHandler mem_; + }; + class LinAlgWorkspaceCUDA { public: @@ -19,13 +38,17 @@ namespace ReSolve // accessors void* getSpmvBuffer(); void* getNormBuffer(); - void* getScaleAddBuffer(); + ScaleAddBufferCUDA* getScaleAddIBuffer(); + ScaleAddBufferCUDA* getScaleAddBBuffer(); void* getTransposeBufferWorkspace(); void setTransposeBufferWorkspace(size_t bufferSize); bool isTransposeBufferAllocated(); void setSpmvBuffer(void* buffer); void setNormBuffer(void* buffer); - void setScaleAddBuffer(void* buffer); + void setScaleAddIBuffer(ScaleAddBufferCUDA* buffer); + void setScaleAddBBuffer(ScaleAddBufferCUDA* buffer); + void scaleAddISetupDone(); + void scaleAddBSetupDone(); cublasHandle_t getCublasHandle(); cusolverSpHandle_t getCusolverSpHandle(); // needed for 1-norms etc @@ -52,8 +75,8 @@ namespace ReSolve bool matvecSetup(); void matvecSetupDone(); - bool scaleAddSetup(); - void scaleAddSetupDone(); + bool scaleAddISetup(); + bool scaleAddBSetup(); private: // handles @@ -72,10 +95,12 @@ namespace ReSolve // buffers void* buffer_spmv_{nullptr}; void* buffer_1norm_{nullptr}; - void* buffer_scale_add_{nullptr}; + ScaleAddBufferCUDA* buffer_scale_add_i{nullptr}; + ScaleAddBufferCUDA* buffer_scale_add_b{nullptr}; bool matvec_setup_done_{false}; // check if setup is done for matvec i.e. if buffer is allocated, csr structure is set etc. - bool scale_add_setup_done_{false}; + bool scale_add_i_setup_done_{false}; + bool scale_add_b_setup_done_{false}; void* transpose_workspace_{nullptr}; // needed for transpose bool transpose_workspace_ready_{false}; // to track if allocated diff --git a/resolve/workspace/LinAlgWorkspaceCpu.cpp b/resolve/workspace/LinAlgWorkspaceCpu.cpp index d80e6264f..a7ee62ca4 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.cpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.cpp @@ -10,11 +10,9 @@ namespace ReSolve * @brief Store sparsity pattern * * @param[in] row_data - pointer to row data (array of integers, length:nrows+1) - * @param[in] nrows - number of rows * @param[in] col_data - pointer to column data (array of integers, length: nnz) - * @param[in] nnz - number of non-zeros */ - ScaleAddBuffer::ScaleAddBuffer(std::vector rowData, std::vector colData) + ScaleAddBufferCpu::ScaleAddBufferCpu(std::vector rowData, std::vector colData) : rowData_(std::move(rowData)), colData_(std::move(colData)) { } @@ -24,7 +22,7 @@ namespace ReSolve * * @return precalculated row pointers */ - index_type* ScaleAddBuffer::getRowData() + index_type* ScaleAddBufferCpu::getRowData() { return rowData_.data(); } @@ -34,7 +32,7 @@ namespace ReSolve * * @return precalculated column indices */ - index_type* ScaleAddBuffer::getColumnData() + index_type* ScaleAddBufferCpu::getColumnData() { return colData_.data(); } @@ -44,7 +42,7 @@ namespace ReSolve * * @return number of matrix rows. */ - index_type ScaleAddBuffer::getNumRows() + index_type ScaleAddBufferCpu::getNumRows() { return static_cast(rowData_.size()) - 1; } @@ -54,7 +52,7 @@ namespace ReSolve * * @return number of matrix columns. */ - index_type ScaleAddBuffer::getNumColumns() + index_type ScaleAddBufferCpu::getNumColumns() { return getNumRows(); } @@ -64,7 +62,7 @@ namespace ReSolve * * @return number of non-zeros */ - index_type ScaleAddBuffer::getNnz() + index_type ScaleAddBufferCpu::getNnz() { return static_cast(colData_.size()); } @@ -103,13 +101,13 @@ namespace ReSolve scaleAddISetupDone_ = true; } - ScaleAddBuffer* LinAlgWorkspaceCpu::getScaleAddIBuffer() + ScaleAddBufferCpu* LinAlgWorkspaceCpu::getScaleAddIBuffer() { assert(scaleAddIBuffer_ != nullptr); return scaleAddIBuffer_; } - void LinAlgWorkspaceCpu::setScaleAddIBuffer(ScaleAddBuffer* buffer) + void LinAlgWorkspaceCpu::setScaleAddIBuffer(ScaleAddBufferCpu* buffer) { assert(buffer != nullptr); assert(scaleAddIBuffer_ == nullptr); @@ -127,13 +125,13 @@ namespace ReSolve scaleAddBSetupDone_ = true; } - ScaleAddBuffer* LinAlgWorkspaceCpu::getScaleAddBBuffer() + ScaleAddBufferCpu* LinAlgWorkspaceCpu::getScaleAddBBuffer() { assert(scaleAddBBuffer_ != nullptr); return scaleAddBBuffer_; } - void LinAlgWorkspaceCpu::setScaleAddBBuffer(ScaleAddBuffer* buffer) + void LinAlgWorkspaceCpu::setScaleAddBBuffer(ScaleAddBufferCpu* buffer) { assert(scaleAddBBuffer_ == nullptr); scaleAddBBuffer_ = buffer; diff --git a/resolve/workspace/LinAlgWorkspaceCpu.hpp b/resolve/workspace/LinAlgWorkspaceCpu.hpp index e2045e4fc..03b87a237 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.hpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.hpp @@ -6,10 +6,10 @@ namespace ReSolve { - class ScaleAddBuffer + class ScaleAddBufferCpu { public: - ScaleAddBuffer(std::vector rowData, std::vector colData); + ScaleAddBufferCpu(std::vector rowData, std::vector colData); index_type* getRowData(); index_type* getColumnData(); index_type getNumRows(); @@ -30,20 +30,20 @@ namespace ReSolve void resetLinAlgWorkspace(); bool scaleAddISetup(); void scaleAddISetupDone(); - ScaleAddBuffer* getScaleAddIBuffer(); - void setScaleAddIBuffer(ScaleAddBuffer* buffer); + ScaleAddBufferCpu* getScaleAddIBuffer(); + void setScaleAddIBuffer(ScaleAddBufferCpu* buffer); bool scaleAddBSetup(); void scaleAddBSetupDone(); - ScaleAddBuffer* getScaleAddBBuffer(); - void setScaleAddBBuffer(ScaleAddBuffer* buffer); + ScaleAddBufferCpu* getScaleAddBBuffer(); + void setScaleAddBBuffer(ScaleAddBufferCpu* buffer); private: // check if setup is done for scaleAddI i.e. if buffer is allocated, csr structure is set etc. bool scaleAddISetupDone_{false}; - ScaleAddBuffer* scaleAddIBuffer_{nullptr}; + ScaleAddBufferCpu* scaleAddIBuffer_{nullptr}; // check if setup is done for scaleAddB i.e. if buffer is allocated, csr structure is set etc. bool scaleAddBSetupDone_{false}; - ScaleAddBuffer* scaleAddBBuffer_{nullptr}; + ScaleAddBufferCpu* scaleAddBBuffer_{nullptr}; }; } // namespace ReSolve From 319b6b63e930cd130ceffcb2a0ab99120a3f263b Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Tue, 18 Nov 2025 17:18:42 -0500 Subject: [PATCH 31/47] checkpoint Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandlerCuda.cpp | 45 ++++++++++++++--------- resolve/matrix/MatrixHandlerCuda.hpp | 7 +++- resolve/workspace/LinAlgWorkspaceCUDA.cpp | 24 +++++++++++- resolve/workspace/LinAlgWorkspaceCUDA.hpp | 2 + 4 files changed, 58 insertions(+), 20 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index f0995110d..e15fd073b 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -402,7 +402,7 @@ namespace ReSolve return 0; } - void MatrixHandlerCuda::allocateForSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C) + void MatrixHandlerCuda::allocateForSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, ScaleAddBufferCUDA** pattern) { auto handle = workspace_->getCusparseHandle(); cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST); @@ -441,20 +441,16 @@ namespace ReSolve cusparseStatus_t info = cusparseDcsrgeam2_bufferSizeExt(handle, m, n, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j, &buffer_byte_size_add); assert(info == CUSPARSE_STATUS_SUCCESS); - auto buffer = new ScaleAddBufferCUDA(n + 1, buffer_byte_size_add); - workspace_->setScaleAddBBuffer(buffer); + *pattern = new ScaleAddBufferCUDA(n + 1, buffer_byte_size_add); index_type nnz_total; // determines sum row offsets and total number of nonzeros - info = cusparseXcsrgeam2Nnz(handle, m, n, descr_a, nnz_a, a_i, a_j, descr_a, nnz_b, b_i, b_j, descr_a, buffer->getRowData(), &nnz_total, buffer->getBuffer()); + info = cusparseXcsrgeam2Nnz(handle, m, n, descr_a, nnz_a, a_i, a_j, descr_a, nnz_b, b_i, b_j, descr_a, (*pattern)->getRowData(), &nnz_total, (*pattern)->getBuffer()); assert(info == CUSPARSE_STATUS_SUCCESS); - - C->setNnz(nnz_total); - C->allocateMatrixData(memory::DEVICE); - mem_.copyArrayDeviceToDevice(C->getRowData(memory::DEVICE), buffer->getRowData(), n + 1); + (*pattern)->setNnz(nnz_total); } - void MatrixHandlerCuda::computeSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C) + void MatrixHandlerCuda::computeSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C, ScaleAddBufferCUDA* pattern) { auto a_v = A->getValues(memory::DEVICE); auto a_i = A->getRowData(memory::DEVICE); @@ -483,10 +479,9 @@ namespace ReSolve index_type nnz_a = A->getNnz(); index_type nnz_b = B->getNnz(); - ScaleAddBufferCUDA* buffer = workspace_->getScaleAddBBuffer(); - mem_.copyArrayDeviceToDevice(c_i, buffer->getRowData(), n + 1); + mem_.copyArrayDeviceToDevice(c_i, pattern->getRowData(), n + 1); cusparseMatDescr_t descr_a = workspace_->getScaleAddMatrixDescriptor(); - cusparseStatus_t info = cusparseDcsrgeam2(handle, m, n, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j, buffer->getBuffer()); + cusparseStatus_t info = cusparseDcsrgeam2(handle, m, n, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j, pattern->getBuffer()); assert(info == CUSPARSE_STATUS_SUCCESS); C->setUpdated(memory::DEVICE); } @@ -550,12 +545,28 @@ namespace ReSolve */ int MatrixHandlerCuda::scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B) { - matrix::Csr C(A->getNumRows(), A->getNumColumns(), A->getNnz()); - allocateForSum(A, alpha, B, 1., &C); - - computeSum(A, alpha, B, 1., &C); - updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz()); + // Reuse sparsity pattern if it is available + if (workspace_->scaleAddBSetup()) + { + ScaleAddBufferCUDA* pattern = workspace_->getScaleAddBBuffer(); + matrix::Csr C(A->getNumRows(), A->getNumColumns(), pattern->getNnz()); + C.allocateMatrixData(memory::DEVICE); + computeSum(A, alpha, B, 1., &C, pattern); + updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz()); + } + else + { + ScaleAddBufferCUDA* pattern = nullptr; + matrix::Csr C(A->getNumRows(), A->getNumColumns(), A->getNnz()); + allocateForSum(A, alpha, B, 1., &pattern); + workspace_->setScaleAddBBuffer(pattern); + workspace_->scaleAddBSetupDone(); + C.setNnz(pattern->getNnz()); + C.allocateMatrixData(memory::DEVICE); + computeSum(A, alpha, B, 1., &C, pattern); + updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz()); + } return 0; } diff --git a/resolve/matrix/MatrixHandlerCuda.hpp b/resolve/matrix/MatrixHandlerCuda.hpp index 4a98a0a26..716a4e49f 100644 --- a/resolve/matrix/MatrixHandlerCuda.hpp +++ b/resolve/matrix/MatrixHandlerCuda.hpp @@ -8,6 +8,9 @@ namespace ReSolve { + + class ScaleAddBufferCUDA; + namespace vector { class Vector; @@ -62,8 +65,8 @@ namespace ReSolve void setValuesChanged(bool isValuesChanged) override; private: - void allocateForSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C); - void computeSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C); + void allocateForSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, ScaleAddBufferCUDA** pattern); + void computeSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C, ScaleAddBufferCUDA* pattern); LinAlgWorkspaceCUDA* workspace_{nullptr}; bool values_changed_{true}; ///< needed for matvec diff --git a/resolve/workspace/LinAlgWorkspaceCUDA.cpp b/resolve/workspace/LinAlgWorkspaceCUDA.cpp index 8ab64df81..2fc833d7a 100644 --- a/resolve/workspace/LinAlgWorkspaceCUDA.cpp +++ b/resolve/workspace/LinAlgWorkspaceCUDA.cpp @@ -65,7 +65,17 @@ namespace ReSolve */ index_type ScaleAddBufferCUDA::getNnz() { - return rowData_[numRows_]; + return nnz_; + } + + /** + * @brief Get number of non-zeros. + * + * @return number of non-zeros + */ + void ScaleAddBufferCUDA::setNnz(index_type nnz) + { + nnz_ = nnz; } LinAlgWorkspaceCUDA::LinAlgWorkspaceCUDA() @@ -142,6 +152,18 @@ namespace ReSolve transpose_workspace_ = nullptr; transpose_workspace_ready_ = false; } + if (scale_add_b_setup_done_) + { + delete buffer_scale_add_b; + buffer_scale_add_b = nullptr; + scale_add_b_setup_done_ = false; + } + if (scale_add_i_setup_done_) + { + delete buffer_scale_add_i; + buffer_scale_add_i = nullptr; + scale_add_i_setup_done_ = false; + } return; } diff --git a/resolve/workspace/LinAlgWorkspaceCUDA.hpp b/resolve/workspace/LinAlgWorkspaceCUDA.hpp index d34b561e1..a8ec09b83 100644 --- a/resolve/workspace/LinAlgWorkspaceCUDA.hpp +++ b/resolve/workspace/LinAlgWorkspaceCUDA.hpp @@ -17,12 +17,14 @@ namespace ReSolve index_type* getRowData(); void* getBuffer(); index_type getNumRows(); + void setNnz(index_type nnz); index_type getNnz(); private: index_type* rowData_; void* buffer_; index_type numRows_; + index_type nnz_; size_t bufferSize_; MemoryHandler mem_; }; From b2f52bcf20181b14e435e92ed348e96b0c614f10 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Tue, 18 Nov 2025 20:32:58 -0500 Subject: [PATCH 32/47] set error code Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandlerCuda.cpp | 88 ++++++++++++++++++------ resolve/matrix/MatrixHandlerCuda.hpp | 4 +- resolve/workspace/LinAlgWorkspaceCpu.cpp | 4 +- 3 files changed, 71 insertions(+), 25 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index e15fd073b..9de47d1f0 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -402,10 +402,22 @@ namespace ReSolve return 0; } - void MatrixHandlerCuda::allocateForSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, ScaleAddBufferCUDA** pattern) + /** + * @brief Calculate buffer size and sparsity pattern of alpha*A + beta*B + * + * @param[in] A - CSR matrix + * @param[in] alpha - constant to be added to A + * @param[in] B - CSR matrix + * @param[in] beta - constant to be added to B + * @param[in, out] pattern - sparsity pattern and buffer + * + * @return int error code, 0 if successful + */ + int MatrixHandlerCuda::allocateForSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, ScaleAddBufferCUDA** pattern) { auto handle = workspace_->getCusparseHandle(); - cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST); + cusparseStatus_t cu_info = cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST); + int info = (cu_info != CUSPARSE_STATUS_SUCCESS); workspace_->setCusparseHandle(handle); cusparseMatDescr_t descr_a = workspace_->getScaleAddMatrixDescriptor(); @@ -438,19 +450,31 @@ namespace ReSolve size_t buffer_byte_size_add; // calculates sum buffer - cusparseStatus_t info = cusparseDcsrgeam2_bufferSizeExt(handle, m, n, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j, &buffer_byte_size_add); - assert(info == CUSPARSE_STATUS_SUCCESS); + cu_info = cusparseDcsrgeam2_bufferSizeExt(handle, m, n, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j, &buffer_byte_size_add); + info = info || (cu_info != CUSPARSE_STATUS_SUCCESS); *pattern = new ScaleAddBufferCUDA(n + 1, buffer_byte_size_add); index_type nnz_total; // determines sum row offsets and total number of nonzeros - info = cusparseXcsrgeam2Nnz(handle, m, n, descr_a, nnz_a, a_i, a_j, descr_a, nnz_b, b_i, b_j, descr_a, (*pattern)->getRowData(), &nnz_total, (*pattern)->getBuffer()); - assert(info == CUSPARSE_STATUS_SUCCESS); + cu_info = cusparseXcsrgeam2Nnz(handle, m, n, descr_a, nnz_a, a_i, a_j, descr_a, nnz_b, b_i, b_j, descr_a, (*pattern)->getRowData(), &nnz_total, (*pattern)->getBuffer()); + info = info || (cu_info != CUSPARSE_STATUS_SUCCESS); (*pattern)->setNnz(nnz_total); + return 0; } - void MatrixHandlerCuda::computeSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C, ScaleAddBufferCUDA* pattern) + /** + * @brief Given sparsity pattern, calculate alpha*A + beta*B + * + * @param[in] A - CSR matrix + * @param[in] alpha - constant to be added to A + * @param[in] B - CSR matrix + * @param[in] beta - constant to be added to B + * @param[in, out] pattern - sparsity pattern and buffer + * + * @return int error code, 0 if successful + */ + int MatrixHandlerCuda::computeSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C, ScaleAddBufferCUDA* pattern) { auto a_v = A->getValues(memory::DEVICE); auto a_i = A->getRowData(memory::DEVICE); @@ -479,11 +503,12 @@ namespace ReSolve index_type nnz_a = A->getNnz(); index_type nnz_b = B->getNnz(); - mem_.copyArrayDeviceToDevice(c_i, pattern->getRowData(), n + 1); + int info = mem_.copyArrayDeviceToDevice(c_i, pattern->getRowData(), n + 1); cusparseMatDescr_t descr_a = workspace_->getScaleAddMatrixDescriptor(); - cusparseStatus_t info = cusparseDcsrgeam2(handle, m, n, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j, pattern->getBuffer()); - assert(info == CUSPARSE_STATUS_SUCCESS); - C->setUpdated(memory::DEVICE); + cusparseStatus_t cu_info = cusparseDcsrgeam2(handle, m, n, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j, pattern->getBuffer()); + info = info || (cu_info != CUSPARSE_STATUS_SUCCESS); + info = info || C->setUpdated(memory::DEVICE); + return info; } /** @@ -530,9 +555,30 @@ namespace ReSolve std::vector I_v(n, 1.0); matrix::Csr I(A->getNumRows(), A->getNumColumns(), n); - I.copyDataFrom(I_i.data(), I_j.data(), I_v.data(), n, memory::HOST, memory::DEVICE); + int info = I.copyDataFrom(I_i.data(), I_j.data(), I_v.data(), n, memory::HOST, memory::DEVICE); - return scaleAddB(A, alpha, &I); + // Reuse sparsity pattern if it is available + if (workspace_->scaleAddISetup()) + { + ScaleAddBufferCUDA* pattern = workspace_->getScaleAddIBuffer(); + matrix::Csr C(A->getNumRows(), A->getNumColumns(), pattern->getNnz()); + info = info || C.allocateMatrixData(memory::DEVICE); + info = info || computeSum(A, alpha, &I, 1., &C, pattern); + info = info || updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz()); + } + else + { + ScaleAddBufferCUDA* pattern = nullptr; + matrix::Csr C(A->getNumRows(), A->getNumColumns(), A->getNnz()); + info = info || allocateForSum(A, alpha, &I, 1., &pattern); + workspace_->setScaleAddIBuffer(pattern); + workspace_->scaleAddISetupDone(); + C.setNnz(pattern->getNnz()); + C.allocateMatrixData(memory::DEVICE); + info = info || computeSum(A, alpha, &I, 1., &C, pattern); + info = info || updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz()); + } + return info; } /** @@ -545,29 +591,29 @@ namespace ReSolve */ int MatrixHandlerCuda::scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B) { - + int info = 0; // Reuse sparsity pattern if it is available if (workspace_->scaleAddBSetup()) { ScaleAddBufferCUDA* pattern = workspace_->getScaleAddBBuffer(); matrix::Csr C(A->getNumRows(), A->getNumColumns(), pattern->getNnz()); - C.allocateMatrixData(memory::DEVICE); - computeSum(A, alpha, B, 1., &C, pattern); - updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz()); + info = info || C.allocateMatrixData(memory::DEVICE); + info = info || computeSum(A, alpha, B, 1., &C, pattern); + info = info || updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz()); } else { ScaleAddBufferCUDA* pattern = nullptr; matrix::Csr C(A->getNumRows(), A->getNumColumns(), A->getNnz()); - allocateForSum(A, alpha, B, 1., &pattern); + info = info || allocateForSum(A, alpha, B, 1., &pattern); workspace_->setScaleAddBBuffer(pattern); workspace_->scaleAddBSetupDone(); C.setNnz(pattern->getNnz()); C.allocateMatrixData(memory::DEVICE); - computeSum(A, alpha, B, 1., &C, pattern); - updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz()); + info = info || computeSum(A, alpha, B, 1., &C, pattern); + info = info || updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz()); } - return 0; + return info; } } // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerCuda.hpp b/resolve/matrix/MatrixHandlerCuda.hpp index 716a4e49f..7441ba1e4 100644 --- a/resolve/matrix/MatrixHandlerCuda.hpp +++ b/resolve/matrix/MatrixHandlerCuda.hpp @@ -65,8 +65,8 @@ namespace ReSolve void setValuesChanged(bool isValuesChanged) override; private: - void allocateForSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, ScaleAddBufferCUDA** pattern); - void computeSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C, ScaleAddBufferCUDA* pattern); + int allocateForSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, ScaleAddBufferCUDA** pattern); + int computeSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C, ScaleAddBufferCUDA* pattern); LinAlgWorkspaceCUDA* workspace_{nullptr}; bool values_changed_{true}; ///< needed for matvec diff --git a/resolve/workspace/LinAlgWorkspaceCpu.cpp b/resolve/workspace/LinAlgWorkspaceCpu.cpp index a7ee62ca4..fede4a1cd 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.cpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.cpp @@ -9,8 +9,8 @@ namespace ReSolve /** * @brief Store sparsity pattern * - * @param[in] row_data - pointer to row data (array of integers, length:nrows+1) - * @param[in] col_data - pointer to column data (array of integers, length: nnz) + * @param[in] row_data - row data (array of integers, length:nrows+1) + * @param[in] col_data - column data (array of integers, length: nnz) */ ScaleAddBufferCpu::ScaleAddBufferCpu(std::vector rowData, std::vector colData) : rowData_(std::move(rowData)), colData_(std::move(colData)) From b54dd5ecc5efa2fef6a578d3b15925acfce72fb8 Mon Sep 17 00:00:00 2001 From: quantumsteve Date: Wed, 19 Nov 2025 01:37:20 +0000 Subject: [PATCH 33/47] Apply pre-commmit fixes --- resolve/matrix/MatrixHandlerCuda.cpp | 4 ++-- resolve/workspace/LinAlgWorkspaceCUDA.hpp | 18 +++++++++--------- resolve/workspace/LinAlgWorkspaceCpu.hpp | 16 ++++++++-------- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index 9de47d1f0..bb4d10cf8 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -415,7 +415,7 @@ namespace ReSolve */ int MatrixHandlerCuda::allocateForSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, ScaleAddBufferCUDA** pattern) { - auto handle = workspace_->getCusparseHandle(); + auto handle = workspace_->getCusparseHandle(); cusparseStatus_t cu_info = cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST); int info = (cu_info != CUSPARSE_STATUS_SUCCESS); workspace_->setCusparseHandle(handle); @@ -552,7 +552,7 @@ namespace ReSolve std::vector I_j(n); std::iota(I_j.begin(), I_j.end(), 0); - std::vector I_v(n, 1.0); + std::vector I_v(n, 1.0); matrix::Csr I(A->getNumRows(), A->getNumColumns(), n); int info = I.copyDataFrom(I_i.data(), I_j.data(), I_v.data(), n, memory::HOST, memory::DEVICE); diff --git a/resolve/workspace/LinAlgWorkspaceCUDA.hpp b/resolve/workspace/LinAlgWorkspaceCUDA.hpp index a8ec09b83..1bff1eaf1 100644 --- a/resolve/workspace/LinAlgWorkspaceCUDA.hpp +++ b/resolve/workspace/LinAlgWorkspaceCUDA.hpp @@ -38,15 +38,15 @@ namespace ReSolve void resetLinAlgWorkspace(); // accessors - void* getSpmvBuffer(); - void* getNormBuffer(); + void* getSpmvBuffer(); + void* getNormBuffer(); ScaleAddBufferCUDA* getScaleAddIBuffer(); ScaleAddBufferCUDA* getScaleAddBBuffer(); - void* getTransposeBufferWorkspace(); - void setTransposeBufferWorkspace(size_t bufferSize); - bool isTransposeBufferAllocated(); - void setSpmvBuffer(void* buffer); - void setNormBuffer(void* buffer); + void* getTransposeBufferWorkspace(); + void setTransposeBufferWorkspace(size_t bufferSize); + bool isTransposeBufferAllocated(); + void setSpmvBuffer(void* buffer); + void setNormBuffer(void* buffer); void setScaleAddIBuffer(ScaleAddBufferCUDA* buffer); void setScaleAddBBuffer(ScaleAddBufferCUDA* buffer); void scaleAddISetupDone(); @@ -95,8 +95,8 @@ namespace ReSolve cusparseDnVecDescr_t vec_y_; // buffers - void* buffer_spmv_{nullptr}; - void* buffer_1norm_{nullptr}; + void* buffer_spmv_{nullptr}; + void* buffer_1norm_{nullptr}; ScaleAddBufferCUDA* buffer_scale_add_i{nullptr}; ScaleAddBufferCUDA* buffer_scale_add_b{nullptr}; diff --git a/resolve/workspace/LinAlgWorkspaceCpu.hpp b/resolve/workspace/LinAlgWorkspaceCpu.hpp index 03b87a237..edd7d3374 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.hpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.hpp @@ -26,23 +26,23 @@ namespace ReSolve public: LinAlgWorkspaceCpu(); ~LinAlgWorkspaceCpu(); - void initializeHandles(); - void resetLinAlgWorkspace(); - bool scaleAddISetup(); - void scaleAddISetupDone(); + void initializeHandles(); + void resetLinAlgWorkspace(); + bool scaleAddISetup(); + void scaleAddISetupDone(); ScaleAddBufferCpu* getScaleAddIBuffer(); void setScaleAddIBuffer(ScaleAddBufferCpu* buffer); - bool scaleAddBSetup(); - void scaleAddBSetupDone(); + bool scaleAddBSetup(); + void scaleAddBSetupDone(); ScaleAddBufferCpu* getScaleAddBBuffer(); void setScaleAddBBuffer(ScaleAddBufferCpu* buffer); private: // check if setup is done for scaleAddI i.e. if buffer is allocated, csr structure is set etc. - bool scaleAddISetupDone_{false}; + bool scaleAddISetupDone_{false}; ScaleAddBufferCpu* scaleAddIBuffer_{nullptr}; // check if setup is done for scaleAddB i.e. if buffer is allocated, csr structure is set etc. - bool scaleAddBSetupDone_{false}; + bool scaleAddBSetupDone_{false}; ScaleAddBufferCpu* scaleAddBBuffer_{nullptr}; }; From 8396551efe6df9e2ff1ed86791ac518a4cf4cf0b Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Wed, 19 Nov 2025 12:26:03 -0500 Subject: [PATCH 34/47] Update memory deletion for scale add buffers pointers are on host, not device --- resolve/workspace/LinAlgWorkspaceCUDA.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/resolve/workspace/LinAlgWorkspaceCUDA.cpp b/resolve/workspace/LinAlgWorkspaceCUDA.cpp index 2fc833d7a..8e59e9acd 100644 --- a/resolve/workspace/LinAlgWorkspaceCUDA.cpp +++ b/resolve/workspace/LinAlgWorkspaceCUDA.cpp @@ -100,9 +100,9 @@ namespace ReSolve if (buffer_spmv_ != nullptr) mem_.deleteOnDevice(buffer_spmv_); if (buffer_scale_add_i != nullptr) - mem_.deleteOnDevice(buffer_scale_add_i); + delete buffer_scale_add_i; if (buffer_scale_add_b != nullptr) - mem_.deleteOnDevice(buffer_scale_add_b); + delete buffer_scale_add_b; if (d_r_size_ != 0) mem_.deleteOnDevice(d_r_); if (norm_buffer_ready_) From cf0d39471b0f71dd4bc2948b250a33d2121c27a9 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Wed, 19 Nov 2025 12:27:52 -0500 Subject: [PATCH 35/47] checkpoint Signed-off-by: Steven Hahn --- resolve/workspace/LinAlgWorkspaceHIP.cpp | 117 ++++++++++++++++++++++- resolve/workspace/LinAlgWorkspaceHIP.hpp | 38 ++++++++ 2 files changed, 154 insertions(+), 1 deletion(-) diff --git a/resolve/workspace/LinAlgWorkspaceHIP.cpp b/resolve/workspace/LinAlgWorkspaceHIP.cpp index fc13e6896..427d0a1ad 100644 --- a/resolve/workspace/LinAlgWorkspaceHIP.cpp +++ b/resolve/workspace/LinAlgWorkspaceHIP.cpp @@ -2,11 +2,118 @@ namespace ReSolve { + + /** + * @brief Store sparsity pattern + * + * @param[in] row_data - pointer to row data (array of integers, length:nrows+1) + * @param[in] nrows - number of rows + * @param[in] col_data - pointer to column data (array of integers, length: nnz) + * @param[in] nnz - number of non-zeros + */ + ScaleAddBufferHIP::ScaleAddBufferHIP(index_type numRows, size_t bufferSize) + : numRows_(numRows), bufferSize_(bufferSize) + { + mem_.allocateArrayOnDevice(&rowData_, numRows_ + 1); + mem_.allocateBufferOnDevice(&buffer_, bufferSize_); + } + + /** + * @brief Destructor + * + */ + ScaleAddBufferHIP::~ScaleAddBufferHIP() + { + mem_.deleteOnDevice(rowData_); + mem_.deleteOnDevice(buffer_); + } + + /** + * @brief Retrieve row sparsity pattern + * + * @return precalculated row pointers + */ + index_type* ScaleAddBufferHIP::getRowData() + { + return rowData_; + } + + /** + * @brief Retrieve row sparsity pattern + * + * @return precalculated row pointers + */ + void* ScaleAddBufferHIP::getBuffer() + { + return buffer_; + } + + /** + * @brief get number of matrix rows + * + * @return number of matrix rows. + */ + index_type ScaleAddBufferHIP::getNumRows() + { + return numRows_; + } + + /** + * @brief Get number of non-zeros. + * + * @return number of non-zeros + */ + index_type ScaleAddBufferHIP::getNnz() + { + return nnz_; + } + + /** + * @brief Get number of non-zeros. + * + * @return number of non-zeros + */ + void ScaleAddBufferHIP::setNnz(index_type nnz) + { + nnz_ = nnz; + } + + // Create a shortcut name for Logger static class + using out = io::Logger; + + /** + * @brief Empty constructor for MatrixHandlerHip object + */ + MatrixHandlerHip::~MatrixHandlerHip() + { + } + + /** + * @brief Constructor for MatrixHandlerHip object + * + * @param[in] new_workspace - pointer to the workspace object + */ + MatrixHandlerHip::MatrixHandlerHip(LinAlgWorkspaceHIP* new_workspace) + { + workspace_ = new_workspace; + } + + /** + * @brief Set values changed flag + * + * @param[in] values_changed - flag indicating if values have changed + */ + void MatrixHandlerHip::setValuesChanged(bool values_changed) + { + values_changed_ = values_changed; + } + LinAlgWorkspaceHIP::LinAlgWorkspaceHIP() { handle_rocsparse_ = nullptr; handle_rocblas_ = nullptr; - + buffer_scale_add_i = nullptr; + buffer_scale_add_b = nullptr; matvec_setup_done_ = false; d_r_ = nullptr; d_r_size_ = 0; @@ -24,6 +131,14 @@ namespace ReSolve { rocsparse_destroy_mat_descr(mat_A_); } + if (buffer_scale_add_i != nullptr) + { + delete buffer_scale_add_i; + } + if (buffer_scale_add_b != nullptr) + { + delete buffer_scale_add_b; + } if (d_r_size_ != 0) { mem_.deleteOnDevice(d_r_); diff --git a/resolve/workspace/LinAlgWorkspaceHIP.hpp b/resolve/workspace/LinAlgWorkspaceHIP.hpp index 4db4b89fa..866076224 100644 --- a/resolve/workspace/LinAlgWorkspaceHIP.hpp +++ b/resolve/workspace/LinAlgWorkspaceHIP.hpp @@ -9,6 +9,27 @@ namespace ReSolve { + + class ScaleAddBufferHIP + { + public: + ScaleAddBufferHIP(index_type numRows, size_t bufferSize); + ~ScaleAddBufferHIP(); + index_type* getRowData(); + void* getBuffer(); + index_type getNumRows(); + void setNnz(index_type nnz); + index_type getNnz(); + + private: + index_type* rowData_; + void* buffer_; + index_type numRows_; + index_type nnz_; + size_t bufferSize_; + MemoryHandler mem_; + }; + class LinAlgWorkspaceHIP { public: @@ -20,24 +41,37 @@ namespace ReSolve rocblas_handle getRocblasHandle(); rocsparse_handle getRocsparseHandle(); rocsparse_mat_descr getSpmvMatrixDescriptor(); + rocsparse_mat_descr getScaleAddMatrixDescriptor(); rocsparse_mat_info getSpmvMatrixInfo(); index_type getDrSize(); real_type* getDr(); real_type* getNormBuffer(); + void setScaleAddIBuffer(ScaleAddBufferHIP* buffer); + void setScaleAddBBuffer(ScaleAddBufferHIP* buffer); + ScaleAddBufferHIP* getScaleAddIBuffer(); + ScaleAddBufferHIP* getScaleAddBBuffer(); void* getTransposeBufferWorkspace(); void setTransposeBufferWorkspace(size_t bufferSize); bool getNormBufferState(); bool isTransposeBufferAllocated(); + void scaleAddISetupDone(); + void scaleAddBSetupDone(); void setRocblasHandle(rocblas_handle handle); void setRocsparseHandle(rocsparse_handle handle); void setSpmvMatrixDescriptor(rocsparse_mat_descr mat); + void setScaleAddMatrixDescriptor(rocsparse_mat_descr mat); + void setSpmvMatrixInfo(rocsparse_mat_info info); void initializeHandles(); bool matvecSetup(); void matvecSetupDone(); + bool scaleAddISetup(); + bool scaleAddBSetup(); + void scaleAddISetupDone(); + void scaleAddBSetupDone(); void setDrSize(index_type new_sz); void setDr(real_type* new_dr); @@ -57,6 +91,8 @@ namespace ReSolve // buffers // there is no buffer needed in matvec bool matvec_setup_done_{false}; // check if setup is done for matvec (note: no buffer but there is analysis) + bool scale_add_i_setup_done_{false}; + bool scale_add_b_setup_done_{false}; // info - but we need info rocsparse_mat_info info_A_; @@ -67,6 +103,8 @@ namespace ReSolve bool transpose_workspace_ready_{false}; // to track if allocated index_type d_r_size_{0}; bool norm_buffer_ready_{false}; // to track if allocated + ScaleAddBufferHIP* buffer_scale_add_i{nullptr}; + ScaleAddBufferHIP* buffer_scale_add_b{nullptr}; MemoryHandler mem_; ///< Memory handler not needed for now }; From 6bc7d7b72ff862c6290a53a15948b122fb05ce9d Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Thu, 20 Nov 2025 10:29:43 -0500 Subject: [PATCH 36/47] fix build Signed-off-by: Steven Hahn --- resolve/LinSolverDirectRocSolverRf.cpp | 2 ++ resolve/matrix/MatrixHandlerCpu.hpp | 2 +- resolve/matrix/MatrixHandlerHip.cpp | 1 + resolve/workspace/LinAlgWorkspaceHIP.cpp | 30 ------------------------ resolve/workspace/LinAlgWorkspaceHIP.hpp | 2 -- 5 files changed, 4 insertions(+), 33 deletions(-) diff --git a/resolve/LinSolverDirectRocSolverRf.cpp b/resolve/LinSolverDirectRocSolverRf.cpp index 4f0ebc50e..4f445db66 100644 --- a/resolve/LinSolverDirectRocSolverRf.cpp +++ b/resolve/LinSolverDirectRocSolverRf.cpp @@ -1,5 +1,7 @@ #include "LinSolverDirectRocSolverRf.hpp" +#include + #include #include diff --git a/resolve/matrix/MatrixHandlerCpu.hpp b/resolve/matrix/MatrixHandlerCpu.hpp index a6dc5f259..640fcd447 100644 --- a/resolve/matrix/MatrixHandlerCpu.hpp +++ b/resolve/matrix/MatrixHandlerCpu.hpp @@ -48,7 +48,7 @@ namespace ReSolve int scaleAddI(matrix::Csr* A, real_type alpha) override; - int scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B); + int scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B); override; virtual int matvec(matrix::Sparse* A, vector_type* vec_x, diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp index 3011ea17e..ab3e8e15f 100644 --- a/resolve/matrix/MatrixHandlerHip.cpp +++ b/resolve/matrix/MatrixHandlerHip.cpp @@ -1,6 +1,7 @@ #include "MatrixHandlerHip.hpp" #include +#include #include #include diff --git a/resolve/workspace/LinAlgWorkspaceHIP.cpp b/resolve/workspace/LinAlgWorkspaceHIP.cpp index 427d0a1ad..ea4fcff5b 100644 --- a/resolve/workspace/LinAlgWorkspaceHIP.cpp +++ b/resolve/workspace/LinAlgWorkspaceHIP.cpp @@ -78,36 +78,6 @@ namespace ReSolve nnz_ = nnz; } - // Create a shortcut name for Logger static class - using out = io::Logger; - - /** - * @brief Empty constructor for MatrixHandlerHip object - */ - MatrixHandlerHip::~MatrixHandlerHip() - { - } - - /** - * @brief Constructor for MatrixHandlerHip object - * - * @param[in] new_workspace - pointer to the workspace object - */ - MatrixHandlerHip::MatrixHandlerHip(LinAlgWorkspaceHIP* new_workspace) - { - workspace_ = new_workspace; - } - - /** - * @brief Set values changed flag - * - * @param[in] values_changed - flag indicating if values have changed - */ - void MatrixHandlerHip::setValuesChanged(bool values_changed) - { - values_changed_ = values_changed; - } - LinAlgWorkspaceHIP::LinAlgWorkspaceHIP() { handle_rocsparse_ = nullptr; diff --git a/resolve/workspace/LinAlgWorkspaceHIP.hpp b/resolve/workspace/LinAlgWorkspaceHIP.hpp index 866076224..aa6e906d1 100644 --- a/resolve/workspace/LinAlgWorkspaceHIP.hpp +++ b/resolve/workspace/LinAlgWorkspaceHIP.hpp @@ -54,8 +54,6 @@ namespace ReSolve void setTransposeBufferWorkspace(size_t bufferSize); bool getNormBufferState(); bool isTransposeBufferAllocated(); - void scaleAddISetupDone(); - void scaleAddBSetupDone(); void setRocblasHandle(rocblas_handle handle); void setRocsparseHandle(rocsparse_handle handle); From ef71cdb2506d8f0b9da562a09c2be7a5787b6d73 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Thu, 20 Nov 2025 11:58:15 -0500 Subject: [PATCH 37/47] working? Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandlerCpu.hpp | 2 +- resolve/matrix/MatrixHandlerHip.cpp | 195 +++++++++++++++++++- resolve/matrix/MatrixHandlerHip.hpp | 4 + resolve/workspace/LinAlgWorkspaceHIP.cpp | 81 +++++++- resolve/workspace/LinAlgWorkspaceHIP.hpp | 6 +- tests/unit/matrix/runMatrixHandlerTests.cpp | 2 - 6 files changed, 269 insertions(+), 21 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCpu.hpp b/resolve/matrix/MatrixHandlerCpu.hpp index 640fcd447..6fcb8fc3c 100644 --- a/resolve/matrix/MatrixHandlerCpu.hpp +++ b/resolve/matrix/MatrixHandlerCpu.hpp @@ -48,7 +48,7 @@ namespace ReSolve int scaleAddI(matrix::Csr* A, real_type alpha) override; - int scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B); override; + int scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B) override; virtual int matvec(matrix::Sparse* A, vector_type* vec_x, diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp index ab3e8e15f..d7d44a0c8 100644 --- a/resolve/matrix/MatrixHandlerHip.cpp +++ b/resolve/matrix/MatrixHandlerHip.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -370,6 +371,133 @@ namespace ReSolve return 0; } + /** + * @brief Calculate buffer size and sparsity pattern of alpha*A + beta*B + * + * @param[in] A - CSR matrix + * @param[in] alpha - constant to be added to A + * @param[in] B - CSR matrix + * @param[in] beta - constant to be added to B + * @param[in, out] pattern - sparsity pattern and buffer + * + * @return int error code, 0 if successful + */ + int MatrixHandlerHip::allocateForSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, ScaleAddBufferHIP** pattern) + { + auto handle = workspace_->getRocsparseHandle(); + rocsparse_status roc_info = rocsparse_set_pointer_mode(handle, rocsparse_pointer_mode_host); + int info = (roc_info != rocsparse_status_success); + workspace_->setRocsparseHandle(handle); + + rocsparse_mat_descr descr_a = workspace_->getScaleAddMatrixDescriptor(); + rocsparse_create_mat_descr(&descr_a); + workspace_->setScaleAddMatrixDescriptor(descr_a); + + auto a_v = A->getValues(memory::DEVICE); + auto a_i = A->getRowData(memory::DEVICE); + auto a_j = A->getColData(memory::DEVICE); + + auto b_v = B->getValues(memory::DEVICE); + auto b_i = B->getRowData(memory::DEVICE); + auto b_j = B->getColData(memory::DEVICE); + + index_type m = A->getNumRows(); + assert(m == B->getNumRows()); + index_type n = A->getNumColumns(); + assert(n == B->getNumColumns()); + + assert(m == n); + + index_type nnz_a = A->getNnz(); + index_type nnz_b = B->getNnz(); + + real_type* c_v = nullptr; + index_type* c_i = nullptr; + index_type* c_j = nullptr; + + size_t buffer_byte_size_add{0}; + *pattern = new ScaleAddBufferHIP(n + 1, buffer_byte_size_add); + + index_type nnz_total; + // determines sum row offsets and total number of nonzeros + roc_info = rocsparse_bsrgeam_nnzb(handle, rocsparse_direction_row, m, n, 1, descr_a, nnz_a, a_i, a_j, descr_a, nnz_b, b_i, b_j, descr_a, (*pattern)->getRowData(), &nnz_total); + info = info || (roc_info != rocsparse_status_success); + (*pattern)->setNnz(nnz_total); + return 0; + } + + + /** + * @brief Given sparsity pattern, calculate alpha*A + beta*B + * + * @param[in] A - CSR matrix + * @param[in] alpha - constant to be added to A + * @param[in] B - CSR matrix + * @param[in] beta - constant to be added to B + * @param[in, out] pattern - sparsity pattern and buffer + * + * @return int error code, 0 if successful + */ + int MatrixHandlerHip::computeSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C, ScaleAddBufferHIP* pattern) + { + auto a_v = A->getValues(memory::DEVICE); + auto a_i = A->getRowData(memory::DEVICE); + auto a_j = A->getColData(memory::DEVICE); + + auto b_v = B->getValues(memory::DEVICE); + auto b_i = B->getRowData(memory::DEVICE); + auto b_j = B->getColData(memory::DEVICE); + + auto c_v = C->getValues(memory::DEVICE); + auto c_i = C->getRowData(memory::DEVICE); + auto c_j = C->getColData(memory::DEVICE); + + auto handle = workspace_->getRocsparseHandle(); + + index_type m = A->getNumRows(); + assert(m == B->getNumRows()); + assert(m == C->getNumRows()); + + index_type n = A->getNumColumns(); + assert(n == B->getNumColumns()); + assert(n == C->getNumColumns()); + + assert(m == n); + + index_type nnz_a = A->getNnz(); + index_type nnz_b = B->getNnz(); + + int info = mem_.copyArrayDeviceToDevice(c_i, pattern->getRowData(), n + 1); + rocsparse_mat_descr descr_a = workspace_->getScaleAddMatrixDescriptor(); + rocsparse_status roc_info = rocsparse_dbsrgeam(handle, rocsparse_direction_row, m, n, 1, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j); + info = info || (roc_info != rocsparse_status_success); + info = info || C->setUpdated(memory::DEVICE); + return info; + } + + /** + * @brief Update matrix with a new number of nonzero elements + * + * @param[in,out] A - Sparse matrix + * @param[in] row_data - pointer to row data (array of integers) + * @param[in] col_data - pointer to column data (array of integers) + * @param[in] val_data - pointer to value data (array of real numbers) + * @param[in] nnz - number of non-zer + * @return 0 if successful, 1 otherwise + */ + static int updateMatrix(matrix::Sparse* A, index_type* rowData, index_type* columnData, real_type* valData, index_type nnz) + { + if (A->destroyMatrixData(memory::DEVICE) != 0) + { + return 1; + } + if (A->destroyMatrixData(memory::HOST) != 0) + { + return 1; + } + return A->copyDataFrom(rowData, columnData, valData, nnz, memory::DEVICE, memory::DEVICE); + } + /** * @brief Add a constant to the nonzero values of a csr matrix, * then add the identity matrix. @@ -380,10 +508,41 @@ namespace ReSolve */ int MatrixHandlerHip::scaleAddI(matrix::Csr* A, real_type alpha) { - (void) A; - (void) alpha; - // NOT IMPLEMENTED - return 1; + index_type n = A->getNumRows(); + + std::vector I_i(n + 1); + std::iota(I_i.begin(), I_i.end(), 0); + + std::vector I_j(n); + std::iota(I_j.begin(), I_j.end(), 0); + + std::vector I_v(n, 1.0); + + matrix::Csr I(A->getNumRows(), A->getNumColumns(), n); + int info = I.copyDataFrom(I_i.data(), I_j.data(), I_v.data(), n, memory::HOST, memory::DEVICE); + + // Reuse sparsity pattern if it is available + if (workspace_->scaleAddISetup()) + { + ScaleAddBufferHIP* pattern = workspace_->getScaleAddIBuffer(); + matrix::Csr C(A->getNumRows(), A->getNumColumns(), pattern->getNnz()); + info = info || C.allocateMatrixData(memory::DEVICE); + info = info || computeSum(A, alpha, &I, 1., &C, pattern); + info = info || updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz()); + } + else + { + ScaleAddBufferHIP* pattern = nullptr; + matrix::Csr C(A->getNumRows(), A->getNumColumns(), A->getNnz()); + info = info || allocateForSum(A, alpha, &I, 1., &pattern); + workspace_->setScaleAddIBuffer(pattern); + workspace_->scaleAddISetupDone(); + C.setNnz(pattern->getNnz()); + C.allocateMatrixData(memory::DEVICE); + info = info || computeSum(A, alpha, &I, 1., &C, pattern); + info = info || updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz()); + } + return info; } /** @@ -396,11 +555,29 @@ namespace ReSolve */ int MatrixHandlerHip::scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B) { - (void) A; - (void) alpha; - (void) B; - // NOT IMPLEMENTED - return 1; + int info = 0; + // Reuse sparsity pattern if it is available + if (workspace_->scaleAddBSetup()) + { + ScaleAddBufferHIP* pattern = workspace_->getScaleAddBBuffer(); + matrix::Csr C(A->getNumRows(), A->getNumColumns(), pattern->getNnz()); + info = info || C.allocateMatrixData(memory::DEVICE); + info = info || computeSum(A, alpha, B, 1., &C, pattern); + info = info || updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz()); + } + else + { + ScaleAddBufferHIP* pattern = nullptr; + matrix::Csr C(A->getNumRows(), A->getNumColumns(), A->getNnz()); + info = info || allocateForSum(A, alpha, B, 1., &pattern); + workspace_->setScaleAddBBuffer(pattern); + workspace_->scaleAddBSetupDone(); + C.setNnz(pattern->getNnz()); + C.allocateMatrixData(memory::DEVICE); + info = info || computeSum(A, alpha, B, 1., &C, pattern); + info = info || updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz()); + } + return info; } } // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerHip.hpp b/resolve/matrix/MatrixHandlerHip.hpp index 400542c79..3d6aa51c5 100644 --- a/resolve/matrix/MatrixHandlerHip.hpp +++ b/resolve/matrix/MatrixHandlerHip.hpp @@ -17,6 +17,7 @@ namespace ReSolve class Csc; class Csr; } // namespace matrix + class ScaleAddBufferHIP; class LinAlgWorkspaceHIP; } // namespace ReSolve @@ -60,6 +61,9 @@ namespace ReSolve void setValuesChanged(bool isValuesChanged) override; private: + int allocateForSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, ScaleAddBufferHIP** pattern); + int computeSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C, ScaleAddBufferHIP* pattern); + LinAlgWorkspaceHIP* workspace_{nullptr}; bool values_changed_{true}; ///< needed for matvec diff --git a/resolve/workspace/LinAlgWorkspaceHIP.cpp b/resolve/workspace/LinAlgWorkspaceHIP.cpp index ea4fcff5b..3d37f62f2 100644 --- a/resolve/workspace/LinAlgWorkspaceHIP.cpp +++ b/resolve/workspace/LinAlgWorkspaceHIP.cpp @@ -1,5 +1,7 @@ #include +#include + namespace ReSolve { @@ -82,8 +84,8 @@ namespace ReSolve { handle_rocsparse_ = nullptr; handle_rocblas_ = nullptr; - buffer_scale_add_i = nullptr; - buffer_scale_add_b = nullptr; + buffer_scale_add_i_ = nullptr; + buffer_scale_add_b_ = nullptr; matvec_setup_done_ = false; d_r_ = nullptr; d_r_size_ = 0; @@ -101,13 +103,17 @@ namespace ReSolve { rocsparse_destroy_mat_descr(mat_A_); } - if (buffer_scale_add_i != nullptr) + if (scale_add_i_setup_done_) { - delete buffer_scale_add_i; + assert(buffer_scale_add_i_ != nullptr); + delete buffer_scale_add_i_; + rocsparse_destroy_mat_descr(mat_B_); } - if (buffer_scale_add_b != nullptr) + if (buffer_scale_add_b_ != nullptr) { - delete buffer_scale_add_b; + assert(buffer_scale_add_b_ != nullptr); + delete buffer_scale_add_b_; + rocsparse_destroy_mat_descr(mat_B_); } if (d_r_size_ != 0) { @@ -137,6 +143,19 @@ namespace ReSolve rocsparse_destroy_mat_descr(mat_A_); matvec_setup_done_ = false; } + if (scale_add_b_setup_done_) + { + delete buffer_scale_add_b_; + buffer_scale_add_b_ = nullptr; + rocsparse_destroy_mat_descr(mat_B_); + scale_add_b_setup_done_ = false; + } + if (scale_add_i_setup_done_) + { + delete buffer_scale_add_i_; + buffer_scale_add_i_ = nullptr; + scale_add_i_setup_done_ = false; + } if (d_r_size_ != 0) { mem_.deleteOnDevice(d_r_); @@ -183,11 +202,21 @@ namespace ReSolve return mat_A_; } + rocsparse_mat_descr LinAlgWorkspaceHIP::getScaleAddMatrixDescriptor() + { + return mat_B_; + } + void LinAlgWorkspaceHIP::setSpmvMatrixDescriptor(rocsparse_mat_descr mat) { mat_A_ = mat; } + void LinAlgWorkspaceHIP::setScaleAddMatrixDescriptor(rocsparse_mat_descr mat) + { + mat_B_ = mat; + } + rocsparse_mat_info LinAlgWorkspaceHIP::getSpmvMatrixInfo() { return info_A_; @@ -213,6 +242,36 @@ namespace ReSolve norm_buffer_ = nb; } + ScaleAddBufferHIP* LinAlgWorkspaceHIP::getScaleAddIBuffer() + { + return buffer_scale_add_i_; + } + + ScaleAddBufferHIP* LinAlgWorkspaceHIP::getScaleAddBBuffer() + { + return buffer_scale_add_b_; + } + + void LinAlgWorkspaceHIP::setScaleAddBBuffer(ScaleAddBufferHIP* buffer) + { + buffer_scale_add_b_ = buffer; + } + + void LinAlgWorkspaceHIP::setScaleAddIBuffer(ScaleAddBufferHIP* buffer) + { + buffer_scale_add_i_ = buffer; + } + + void LinAlgWorkspaceHIP::scaleAddBSetupDone() + { + scale_add_b_setup_done_ = true; + } + + void LinAlgWorkspaceHIP::scaleAddISetupDone() + { + scale_add_i_setup_done_ = true; + } + void LinAlgWorkspaceHIP::setNormBufferState(bool r) { norm_buffer_ready_ = r; @@ -228,6 +287,16 @@ namespace ReSolve matvec_setup_done_ = true; } + bool LinAlgWorkspaceHIP::scaleAddISetup() + { + return scale_add_i_setup_done_; + } + + bool LinAlgWorkspaceHIP::scaleAddBSetup() + { + return scale_add_b_setup_done_; + } + void LinAlgWorkspaceHIP::initializeHandles() { rocsparse_create_handle(&handle_rocsparse_); diff --git a/resolve/workspace/LinAlgWorkspaceHIP.hpp b/resolve/workspace/LinAlgWorkspaceHIP.hpp index aa6e906d1..5bfa2b8d8 100644 --- a/resolve/workspace/LinAlgWorkspaceHIP.hpp +++ b/resolve/workspace/LinAlgWorkspaceHIP.hpp @@ -83,7 +83,7 @@ namespace ReSolve // matrix descriptors rocsparse_mat_descr mat_A_; - + rocsparse_mat_descr mat_B_; // vector descriptors not needed, rocsparse uses RAW pointers. // buffers @@ -101,8 +101,8 @@ namespace ReSolve bool transpose_workspace_ready_{false}; // to track if allocated index_type d_r_size_{0}; bool norm_buffer_ready_{false}; // to track if allocated - ScaleAddBufferHIP* buffer_scale_add_i{nullptr}; - ScaleAddBufferHIP* buffer_scale_add_b{nullptr}; + ScaleAddBufferHIP* buffer_scale_add_i_{nullptr}; + ScaleAddBufferHIP* buffer_scale_add_b_{nullptr}; MemoryHandler mem_; ///< Memory handler not needed for now }; diff --git a/tests/unit/matrix/runMatrixHandlerTests.cpp b/tests/unit/matrix/runMatrixHandlerTests.cpp index 99ef35a40..5f6e7dcdd 100644 --- a/tests/unit/matrix/runMatrixHandlerTests.cpp +++ b/tests/unit/matrix/runMatrixHandlerTests.cpp @@ -77,14 +77,12 @@ void runTests(const std::string& backend, ReSolve::tests::TestingResults& result workspace.resetLinAlgWorkspace(); result += test.rightScale(2048, 1024); workspace.resetLinAlgWorkspace(); -#if !defined(RESOLVE_USE_HIP) result += test.scaleAddI(100); workspace.resetLinAlgWorkspace(); result += test.scaleAddIZero(100); workspace.resetLinAlgWorkspace(); result += test.scaleAddB(100); workspace.resetLinAlgWorkspace(); -#endif std::cout << "\n"; } From a893f2a09fefe2182f5839ac906f2ecf4a4504d9 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Thu, 20 Nov 2025 12:47:12 -0500 Subject: [PATCH 38/47] simplify design Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandlerHip.cpp | 17 ++++++--------- resolve/workspace/LinAlgWorkspaceHIP.cpp | 27 +++++++----------------- resolve/workspace/LinAlgWorkspaceHIP.hpp | 10 +++------ 3 files changed, 17 insertions(+), 37 deletions(-) diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp index d7d44a0c8..6f095ce5a 100644 --- a/resolve/matrix/MatrixHandlerHip.cpp +++ b/resolve/matrix/MatrixHandlerHip.cpp @@ -389,10 +389,6 @@ namespace ReSolve int info = (roc_info != rocsparse_status_success); workspace_->setRocsparseHandle(handle); - rocsparse_mat_descr descr_a = workspace_->getScaleAddMatrixDescriptor(); - rocsparse_create_mat_descr(&descr_a); - workspace_->setScaleAddMatrixDescriptor(descr_a); - auto a_v = A->getValues(memory::DEVICE); auto a_i = A->getRowData(memory::DEVICE); auto a_j = A->getColData(memory::DEVICE); @@ -415,9 +411,8 @@ namespace ReSolve index_type* c_i = nullptr; index_type* c_j = nullptr; - size_t buffer_byte_size_add{0}; - *pattern = new ScaleAddBufferHIP(n + 1, buffer_byte_size_add); - + *pattern = new ScaleAddBufferHIP(n + 1); + rocsparse_mat_descr descr_a = (*pattern)->getMatrixDescriptor(); index_type nnz_total; // determines sum row offsets and total number of nonzeros roc_info = rocsparse_bsrgeam_nnzb(handle, rocsparse_direction_row, m, n, 1, descr_a, nnz_a, a_i, a_j, descr_a, nnz_b, b_i, b_j, descr_a, (*pattern)->getRowData(), &nnz_total); @@ -467,11 +462,11 @@ namespace ReSolve index_type nnz_a = A->getNnz(); index_type nnz_b = B->getNnz(); - int info = mem_.copyArrayDeviceToDevice(c_i, pattern->getRowData(), n + 1); - rocsparse_mat_descr descr_a = workspace_->getScaleAddMatrixDescriptor(); + int info = mem_.copyArrayDeviceToDevice(c_i, pattern->getRowData(), n + 1); + rocsparse_mat_descr descr_a = pattern->getMatrixDescriptor(); rocsparse_status roc_info = rocsparse_dbsrgeam(handle, rocsparse_direction_row, m, n, 1, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j); - info = info || (roc_info != rocsparse_status_success); - info = info || C->setUpdated(memory::DEVICE); + info = info || (roc_info != rocsparse_status_success); + info = info || C->setUpdated(memory::DEVICE); return info; } diff --git a/resolve/workspace/LinAlgWorkspaceHIP.cpp b/resolve/workspace/LinAlgWorkspaceHIP.cpp index 3d37f62f2..6f3d87b6a 100644 --- a/resolve/workspace/LinAlgWorkspaceHIP.cpp +++ b/resolve/workspace/LinAlgWorkspaceHIP.cpp @@ -13,11 +13,11 @@ namespace ReSolve * @param[in] col_data - pointer to column data (array of integers, length: nnz) * @param[in] nnz - number of non-zeros */ - ScaleAddBufferHIP::ScaleAddBufferHIP(index_type numRows, size_t bufferSize) - : numRows_(numRows), bufferSize_(bufferSize) + ScaleAddBufferHIP::ScaleAddBufferHIP(index_type numRows) + : numRows_(numRows) { + rocsparse_create_mat_descr(&mat_A_); mem_.allocateArrayOnDevice(&rowData_, numRows_ + 1); - mem_.allocateBufferOnDevice(&buffer_, bufferSize_); } /** @@ -27,7 +27,7 @@ namespace ReSolve ScaleAddBufferHIP::~ScaleAddBufferHIP() { mem_.deleteOnDevice(rowData_); - mem_.deleteOnDevice(buffer_); + rocsparse_destroy_mat_descr(mat_A_); } /** @@ -45,9 +45,9 @@ namespace ReSolve * * @return precalculated row pointers */ - void* ScaleAddBufferHIP::getBuffer() + rocsparse_mat_descr ScaleAddBufferHIP::getMatrixDescriptor() { - return buffer_; + return mat_A_; } /** @@ -107,13 +107,11 @@ namespace ReSolve { assert(buffer_scale_add_i_ != nullptr); delete buffer_scale_add_i_; - rocsparse_destroy_mat_descr(mat_B_); } if (buffer_scale_add_b_ != nullptr) { assert(buffer_scale_add_b_ != nullptr); delete buffer_scale_add_b_; - rocsparse_destroy_mat_descr(mat_B_); } if (d_r_size_ != 0) { @@ -145,13 +143,14 @@ namespace ReSolve } if (scale_add_b_setup_done_) { + assert(buffer_scale_add_b_ != nullptr); delete buffer_scale_add_b_; buffer_scale_add_b_ = nullptr; - rocsparse_destroy_mat_descr(mat_B_); scale_add_b_setup_done_ = false; } if (scale_add_i_setup_done_) { + assert(buffer_scale_add_i_ != nullptr); delete buffer_scale_add_i_; buffer_scale_add_i_ = nullptr; scale_add_i_setup_done_ = false; @@ -202,21 +201,11 @@ namespace ReSolve return mat_A_; } - rocsparse_mat_descr LinAlgWorkspaceHIP::getScaleAddMatrixDescriptor() - { - return mat_B_; - } - void LinAlgWorkspaceHIP::setSpmvMatrixDescriptor(rocsparse_mat_descr mat) { mat_A_ = mat; } - void LinAlgWorkspaceHIP::setScaleAddMatrixDescriptor(rocsparse_mat_descr mat) - { - mat_B_ = mat; - } - rocsparse_mat_info LinAlgWorkspaceHIP::getSpmvMatrixInfo() { return info_A_; diff --git a/resolve/workspace/LinAlgWorkspaceHIP.hpp b/resolve/workspace/LinAlgWorkspaceHIP.hpp index 5bfa2b8d8..0c30a3e4b 100644 --- a/resolve/workspace/LinAlgWorkspaceHIP.hpp +++ b/resolve/workspace/LinAlgWorkspaceHIP.hpp @@ -13,20 +13,19 @@ namespace ReSolve class ScaleAddBufferHIP { public: - ScaleAddBufferHIP(index_type numRows, size_t bufferSize); + ScaleAddBufferHIP(index_type numRows); ~ScaleAddBufferHIP(); index_type* getRowData(); - void* getBuffer(); + rocsparse_mat_descr getMatrixDescriptor(); index_type getNumRows(); void setNnz(index_type nnz); index_type getNnz(); private: index_type* rowData_; - void* buffer_; + rocsparse_mat_descr mat_A_; index_type numRows_; index_type nnz_; - size_t bufferSize_; MemoryHandler mem_; }; @@ -41,7 +40,6 @@ namespace ReSolve rocblas_handle getRocblasHandle(); rocsparse_handle getRocsparseHandle(); rocsparse_mat_descr getSpmvMatrixDescriptor(); - rocsparse_mat_descr getScaleAddMatrixDescriptor(); rocsparse_mat_info getSpmvMatrixInfo(); index_type getDrSize(); real_type* getDr(); @@ -58,7 +56,6 @@ namespace ReSolve void setRocblasHandle(rocblas_handle handle); void setRocsparseHandle(rocsparse_handle handle); void setSpmvMatrixDescriptor(rocsparse_mat_descr mat); - void setScaleAddMatrixDescriptor(rocsparse_mat_descr mat); void setSpmvMatrixInfo(rocsparse_mat_info info); @@ -83,7 +80,6 @@ namespace ReSolve // matrix descriptors rocsparse_mat_descr mat_A_; - rocsparse_mat_descr mat_B_; // vector descriptors not needed, rocsparse uses RAW pointers. // buffers From 1db9ced6bfb5dc8608993ee1229cb3da1b5edd1b Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Thu, 20 Nov 2025 13:16:59 -0500 Subject: [PATCH 39/47] match changes on HIP Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandlerCuda.cpp | 12 ++---- resolve/workspace/LinAlgWorkspaceCUDA.cpp | 51 ++++++++++++++--------- resolve/workspace/LinAlgWorkspaceCUDA.hpp | 8 ++-- resolve/workspace/LinAlgWorkspaceHIP.cpp | 6 +-- 4 files changed, 42 insertions(+), 35 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index bb4d10cf8..01f33f13e 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -420,12 +420,6 @@ namespace ReSolve int info = (cu_info != CUSPARSE_STATUS_SUCCESS); workspace_->setCusparseHandle(handle); - cusparseMatDescr_t descr_a = workspace_->getScaleAddMatrixDescriptor(); - cusparseCreateMatDescr(&descr_a); - cusparseSetMatType(descr_a, CUSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatIndexBase(descr_a, CUSPARSE_INDEX_BASE_ZERO); - workspace_->setScaleAddMatrixDescriptor(descr_a); - auto a_v = A->getValues(memory::DEVICE); auto a_i = A->getRowData(memory::DEVICE); auto a_j = A->getColData(memory::DEVICE); @@ -448,12 +442,14 @@ namespace ReSolve index_type* c_i = nullptr; index_type* c_j = nullptr; + *pattern = new ScaleAddBufferCUDA(n + 1); + cusparseMatDescr_t descr_a = (*pattern)->getMatrixDescriptor(); size_t buffer_byte_size_add; // calculates sum buffer cu_info = cusparseDcsrgeam2_bufferSizeExt(handle, m, n, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j, &buffer_byte_size_add); info = info || (cu_info != CUSPARSE_STATUS_SUCCESS); - *pattern = new ScaleAddBufferCUDA(n + 1, buffer_byte_size_add); + (*pattern)->allocateBuffer(buffer_byte_size_add); index_type nnz_total; // determines sum row offsets and total number of nonzeros @@ -504,7 +500,7 @@ namespace ReSolve index_type nnz_b = B->getNnz(); int info = mem_.copyArrayDeviceToDevice(c_i, pattern->getRowData(), n + 1); - cusparseMatDescr_t descr_a = workspace_->getScaleAddMatrixDescriptor(); + cusparseMatDescr_t descr_a = pattern->getMatrixDescriptor(); cusparseStatus_t cu_info = cusparseDcsrgeam2(handle, m, n, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j, pattern->getBuffer()); info = info || (cu_info != CUSPARSE_STATUS_SUCCESS); info = info || C->setUpdated(memory::DEVICE); diff --git a/resolve/workspace/LinAlgWorkspaceCUDA.cpp b/resolve/workspace/LinAlgWorkspaceCUDA.cpp index 8e59e9acd..ec9b5fa68 100644 --- a/resolve/workspace/LinAlgWorkspaceCUDA.cpp +++ b/resolve/workspace/LinAlgWorkspaceCUDA.cpp @@ -6,16 +6,15 @@ namespace ReSolve /** * @brief Store sparsity pattern * - * @param[in] row_data - pointer to row data (array of integers, length:nrows+1) * @param[in] nrows - number of rows - * @param[in] col_data - pointer to column data (array of integers, length: nnz) - * @param[in] nnz - number of non-zeros */ - ScaleAddBufferCUDA::ScaleAddBufferCUDA(index_type numRows, size_t bufferSize) - : numRows_(numRows), bufferSize_(bufferSize) + ScaleAddBufferCUDA::ScaleAddBufferCUDA(index_type numRows) + : numRows_(numRows), bufferSize_(0) { mem_.allocateArrayOnDevice(&rowData_, numRows_ + 1); - mem_.allocateBufferOnDevice(&buffer_, bufferSize_); + cusparseCreateMatDescr(&mat_A_); + cusparseSetMatType(mat_A_, CUSPARSE_MATRIX_TYPE_GENERAL); + cusparseSetMatIndexBase(mat_A_, CUSPARSE_INDEX_BASE_ZERO); } /** @@ -26,6 +25,7 @@ namespace ReSolve { mem_.deleteOnDevice(rowData_); mem_.deleteOnDevice(buffer_); + cusparseDestroyMatDescr(mat_A_); } /** @@ -39,9 +39,30 @@ namespace ReSolve } /** - * @brief Retrieve row sparsity pattern + * @brief Retrieve matrix descriptor * - * @return precalculated row pointers + * @return matrix descriptor set for scaleAddB, scaleAddI + */ + cusparseMatDescr_t ScaleAddBufferCUDA::getMatrixDescriptor() + { + return mat_A_; + } + + /** + * @brief Allocate memory for cusparse buffer + * + * @param[in] bufferSize calculated array size + */ + void ScaleAddBufferCUDA::allocateBuffer(size_t bufferSize) + { + bufferSize_ = bufferSize; + mem_.allocateBufferOnDevice(&buffer_, bufferSize_); + } + + /** + * @brief Retrieve cusparse buffer + * + * @return cusparse buffer */ void* ScaleAddBufferCUDA::getBuffer() { @@ -69,9 +90,9 @@ namespace ReSolve } /** - * @brief Get number of non-zeros. + * @brief set number of non-zeros. * - * @return number of non-zeros + * @param[in] nnz number of non-zeros */ void ScaleAddBufferCUDA::setNnz(index_type nnz) { @@ -293,21 +314,11 @@ namespace ReSolve return mat_A_; } - cusparseMatDescr_t LinAlgWorkspaceCUDA::getScaleAddMatrixDescriptor() - { - return mat_B_; - } - void LinAlgWorkspaceCUDA::setSpmvMatrixDescriptor(cusparseSpMatDescr_t mat) { mat_A_ = mat; } - void LinAlgWorkspaceCUDA::setScaleAddMatrixDescriptor(cusparseMatDescr_t mat) - { - mat_B_ = mat; - } - cusparseDnVecDescr_t LinAlgWorkspaceCUDA::getVecX() { return vec_x_; diff --git a/resolve/workspace/LinAlgWorkspaceCUDA.hpp b/resolve/workspace/LinAlgWorkspaceCUDA.hpp index 1bff1eaf1..b90fb865e 100644 --- a/resolve/workspace/LinAlgWorkspaceCUDA.hpp +++ b/resolve/workspace/LinAlgWorkspaceCUDA.hpp @@ -12,10 +12,12 @@ namespace ReSolve class ScaleAddBufferCUDA { public: - ScaleAddBufferCUDA(index_type numRows, size_t bufferSize); + ScaleAddBufferCUDA(index_type numRows); ~ScaleAddBufferCUDA(); index_type* getRowData(); + void allocateBuffer(size_t bufferSize); void* getBuffer(); + cusparseMatDescr_t getMatrixDescriptor(); index_type getNumRows(); void setNnz(index_type nnz); index_type getNnz(); @@ -23,6 +25,7 @@ namespace ReSolve private: index_type* rowData_; void* buffer_; + cusparseMatDescr_t mat_A_; index_type numRows_; index_type nnz_; size_t bufferSize_; @@ -56,7 +59,6 @@ namespace ReSolve cusolverSpHandle_t getCusolverSpHandle(); // needed for 1-norms etc cusparseHandle_t getCusparseHandle(); cusparseSpMatDescr_t getSpmvMatrixDescriptor(); - cusparseMatDescr_t getScaleAddMatrixDescriptor(); cusparseDnVecDescr_t getVecX(); cusparseDnVecDescr_t getVecY(); index_type getDrSize(); @@ -67,7 +69,6 @@ namespace ReSolve void setCusolverSpHandle(cusolverSpHandle_t handle); void setCusparseHandle(cusparseHandle_t handle); void setSpmvMatrixDescriptor(cusparseSpMatDescr_t mat); - void setScaleAddMatrixDescriptor(cusparseMatDescr_t mat); void setDrSize(index_type new_sz); void setDr(real_type* new_dr); void setNormBufferState(bool r); @@ -88,7 +89,6 @@ namespace ReSolve // matrix descriptors cusparseSpMatDescr_t mat_A_; - cusparseMatDescr_t mat_B_; // vector descriptors cusparseDnVecDescr_t vec_x_; diff --git a/resolve/workspace/LinAlgWorkspaceHIP.cpp b/resolve/workspace/LinAlgWorkspaceHIP.cpp index 6f3d87b6a..71cd02237 100644 --- a/resolve/workspace/LinAlgWorkspaceHIP.cpp +++ b/resolve/workspace/LinAlgWorkspaceHIP.cpp @@ -41,9 +41,9 @@ namespace ReSolve } /** - * @brief Retrieve row sparsity pattern + * @brief Retrieve matrix descriptor * - * @return precalculated row pointers + * @return matrix descriptor set for scaleAddB, scaleAddI */ rocsparse_mat_descr ScaleAddBufferHIP::getMatrixDescriptor() { @@ -73,7 +73,7 @@ namespace ReSolve /** * @brief Get number of non-zeros. * - * @return number of non-zeros + * @param[in] nnz number of non-zeros */ void ScaleAddBufferHIP::setNnz(index_type nnz) { From ae6889b32471dec77235fc547468e731553fe711 Mon Sep 17 00:00:00 2001 From: quantumsteve Date: Thu, 20 Nov 2025 18:30:32 +0000 Subject: [PATCH 40/47] Apply pre-commmit fixes --- resolve/matrix/MatrixHandlerCuda.cpp | 2 +- resolve/matrix/MatrixHandlerHip.cpp | 29 +++++++++++----------- resolve/workspace/LinAlgWorkspaceCUDA.hpp | 22 ++++++++--------- resolve/workspace/LinAlgWorkspaceHIP.cpp | 8 +++--- resolve/workspace/LinAlgWorkspaceHIP.hpp | 30 +++++++++++------------ 5 files changed, 45 insertions(+), 46 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index 01f33f13e..e77875cd2 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -444,7 +444,7 @@ namespace ReSolve *pattern = new ScaleAddBufferCUDA(n + 1); cusparseMatDescr_t descr_a = (*pattern)->getMatrixDescriptor(); - size_t buffer_byte_size_add; + size_t buffer_byte_size_add; // calculates sum buffer cu_info = cusparseDcsrgeam2_bufferSizeExt(handle, m, n, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j, &buffer_byte_size_add); info = info || (cu_info != CUSPARSE_STATUS_SUCCESS); diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp index 6f095ce5a..b41d8e376 100644 --- a/resolve/matrix/MatrixHandlerHip.cpp +++ b/resolve/matrix/MatrixHandlerHip.cpp @@ -384,9 +384,9 @@ namespace ReSolve */ int MatrixHandlerHip::allocateForSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, ScaleAddBufferHIP** pattern) { - auto handle = workspace_->getRocsparseHandle(); + auto handle = workspace_->getRocsparseHandle(); rocsparse_status roc_info = rocsparse_set_pointer_mode(handle, rocsparse_pointer_mode_host); - int info = (roc_info != rocsparse_status_success); + int info = (roc_info != rocsparse_status_success); workspace_->setRocsparseHandle(handle); auto a_v = A->getValues(memory::DEVICE); @@ -411,17 +411,16 @@ namespace ReSolve index_type* c_i = nullptr; index_type* c_j = nullptr; - *pattern = new ScaleAddBufferHIP(n + 1); + *pattern = new ScaleAddBufferHIP(n + 1); rocsparse_mat_descr descr_a = (*pattern)->getMatrixDescriptor(); - index_type nnz_total; + index_type nnz_total; // determines sum row offsets and total number of nonzeros roc_info = rocsparse_bsrgeam_nnzb(handle, rocsparse_direction_row, m, n, 1, descr_a, nnz_a, a_i, a_j, descr_a, nnz_b, b_i, b_j, descr_a, (*pattern)->getRowData(), &nnz_total); - info = info || (roc_info != rocsparse_status_success); + info = info || (roc_info != rocsparse_status_success); (*pattern)->setNnz(nnz_total); return 0; } - /** * @brief Given sparsity pattern, calculate alpha*A + beta*B * @@ -462,11 +461,11 @@ namespace ReSolve index_type nnz_a = A->getNnz(); index_type nnz_b = B->getNnz(); - int info = mem_.copyArrayDeviceToDevice(c_i, pattern->getRowData(), n + 1); - rocsparse_mat_descr descr_a = pattern->getMatrixDescriptor(); - rocsparse_status roc_info = rocsparse_dbsrgeam(handle, rocsparse_direction_row, m, n, 1, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j); - info = info || (roc_info != rocsparse_status_success); - info = info || C->setUpdated(memory::DEVICE); + int info = mem_.copyArrayDeviceToDevice(c_i, pattern->getRowData(), n + 1); + rocsparse_mat_descr descr_a = pattern->getMatrixDescriptor(); + rocsparse_status roc_info = rocsparse_dbsrgeam(handle, rocsparse_direction_row, m, n, 1, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j); + info = info || (roc_info != rocsparse_status_success); + info = info || C->setUpdated(memory::DEVICE); return info; } @@ -520,7 +519,7 @@ namespace ReSolve if (workspace_->scaleAddISetup()) { ScaleAddBufferHIP* pattern = workspace_->getScaleAddIBuffer(); - matrix::Csr C(A->getNumRows(), A->getNumColumns(), pattern->getNnz()); + matrix::Csr C(A->getNumRows(), A->getNumColumns(), pattern->getNnz()); info = info || C.allocateMatrixData(memory::DEVICE); info = info || computeSum(A, alpha, &I, 1., &C, pattern); info = info || updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz()); @@ -528,7 +527,7 @@ namespace ReSolve else { ScaleAddBufferHIP* pattern = nullptr; - matrix::Csr C(A->getNumRows(), A->getNumColumns(), A->getNnz()); + matrix::Csr C(A->getNumRows(), A->getNumColumns(), A->getNnz()); info = info || allocateForSum(A, alpha, &I, 1., &pattern); workspace_->setScaleAddIBuffer(pattern); workspace_->scaleAddISetupDone(); @@ -555,7 +554,7 @@ namespace ReSolve if (workspace_->scaleAddBSetup()) { ScaleAddBufferHIP* pattern = workspace_->getScaleAddBBuffer(); - matrix::Csr C(A->getNumRows(), A->getNumColumns(), pattern->getNnz()); + matrix::Csr C(A->getNumRows(), A->getNumColumns(), pattern->getNnz()); info = info || C.allocateMatrixData(memory::DEVICE); info = info || computeSum(A, alpha, B, 1., &C, pattern); info = info || updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz()); @@ -563,7 +562,7 @@ namespace ReSolve else { ScaleAddBufferHIP* pattern = nullptr; - matrix::Csr C(A->getNumRows(), A->getNumColumns(), A->getNnz()); + matrix::Csr C(A->getNumRows(), A->getNumColumns(), A->getNnz()); info = info || allocateForSum(A, alpha, B, 1., &pattern); workspace_->setScaleAddBBuffer(pattern); workspace_->scaleAddBSetupDone(); diff --git a/resolve/workspace/LinAlgWorkspaceCUDA.hpp b/resolve/workspace/LinAlgWorkspaceCUDA.hpp index b90fb865e..578a8f2e9 100644 --- a/resolve/workspace/LinAlgWorkspaceCUDA.hpp +++ b/resolve/workspace/LinAlgWorkspaceCUDA.hpp @@ -14,22 +14,22 @@ namespace ReSolve public: ScaleAddBufferCUDA(index_type numRows); ~ScaleAddBufferCUDA(); - index_type* getRowData(); + index_type* getRowData(); void allocateBuffer(size_t bufferSize); - void* getBuffer(); + void* getBuffer(); cusparseMatDescr_t getMatrixDescriptor(); - index_type getNumRows(); - void setNnz(index_type nnz); - index_type getNnz(); + index_type getNumRows(); + void setNnz(index_type nnz); + index_type getNnz(); private: - index_type* rowData_; - void* buffer_; + index_type* rowData_; + void* buffer_; cusparseMatDescr_t mat_A_; - index_type numRows_; - index_type nnz_; - size_t bufferSize_; - MemoryHandler mem_; + index_type numRows_; + index_type nnz_; + size_t bufferSize_; + MemoryHandler mem_; }; class LinAlgWorkspaceCUDA diff --git a/resolve/workspace/LinAlgWorkspaceHIP.cpp b/resolve/workspace/LinAlgWorkspaceHIP.cpp index 71cd02237..52b2577cf 100644 --- a/resolve/workspace/LinAlgWorkspaceHIP.cpp +++ b/resolve/workspace/LinAlgWorkspaceHIP.cpp @@ -1,7 +1,7 @@ -#include - #include +#include + namespace ReSolve { @@ -82,8 +82,8 @@ namespace ReSolve LinAlgWorkspaceHIP::LinAlgWorkspaceHIP() { - handle_rocsparse_ = nullptr; - handle_rocblas_ = nullptr; + handle_rocsparse_ = nullptr; + handle_rocblas_ = nullptr; buffer_scale_add_i_ = nullptr; buffer_scale_add_b_ = nullptr; matvec_setup_done_ = false; diff --git a/resolve/workspace/LinAlgWorkspaceHIP.hpp b/resolve/workspace/LinAlgWorkspaceHIP.hpp index 0c30a3e4b..3cbb560fe 100644 --- a/resolve/workspace/LinAlgWorkspaceHIP.hpp +++ b/resolve/workspace/LinAlgWorkspaceHIP.hpp @@ -15,18 +15,18 @@ namespace ReSolve public: ScaleAddBufferHIP(index_type numRows); ~ScaleAddBufferHIP(); - index_type* getRowData(); + index_type* getRowData(); rocsparse_mat_descr getMatrixDescriptor(); - index_type getNumRows(); - void setNnz(index_type nnz); - index_type getNnz(); + index_type getNumRows(); + void setNnz(index_type nnz); + index_type getNnz(); private: - index_type* rowData_; + index_type* rowData_; rocsparse_mat_descr mat_A_; - index_type numRows_; - index_type nnz_; - MemoryHandler mem_; + index_type numRows_; + index_type nnz_; + MemoryHandler mem_; }; class LinAlgWorkspaceHIP @@ -91,15 +91,15 @@ namespace ReSolve // info - but we need info rocsparse_mat_info info_A_; - real_type* d_r_{nullptr}; // needed for inf-norm - real_type* norm_buffer_{nullptr}; // needed for inf-norm - void* transpose_workspace_{nullptr}; // needed for transpose - bool transpose_workspace_ready_{false}; // to track if allocated - index_type d_r_size_{0}; - bool norm_buffer_ready_{false}; // to track if allocated + real_type* d_r_{nullptr}; // needed for inf-norm + real_type* norm_buffer_{nullptr}; // needed for inf-norm + void* transpose_workspace_{nullptr}; // needed for transpose + bool transpose_workspace_ready_{false}; // to track if allocated + index_type d_r_size_{0}; + bool norm_buffer_ready_{false}; // to track if allocated ScaleAddBufferHIP* buffer_scale_add_i_{nullptr}; ScaleAddBufferHIP* buffer_scale_add_b_{nullptr}; - MemoryHandler mem_; ///< Memory handler not needed for now + MemoryHandler mem_; ///< Memory handler not needed for now }; } // namespace ReSolve From cc4f865ef0021c36d3268ba0c6e7ead6b8f3ec4a Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Thu, 20 Nov 2025 13:38:56 -0500 Subject: [PATCH 41/47] fix warnings Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandlerHip.cpp | 15 ++++----------- resolve/matrix/MatrixHandlerHip.hpp | 2 +- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp index b41d8e376..7e9a5cc80 100644 --- a/resolve/matrix/MatrixHandlerHip.cpp +++ b/resolve/matrix/MatrixHandlerHip.cpp @@ -375,25 +375,21 @@ namespace ReSolve * @brief Calculate buffer size and sparsity pattern of alpha*A + beta*B * * @param[in] A - CSR matrix - * @param[in] alpha - constant to be added to A * @param[in] B - CSR matrix - * @param[in] beta - constant to be added to B * @param[in, out] pattern - sparsity pattern and buffer * * @return int error code, 0 if successful */ - int MatrixHandlerHip::allocateForSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, ScaleAddBufferHIP** pattern) + int MatrixHandlerHip::allocateForSum(matrix::Csr* A, matrix::Csr* B, ScaleAddBufferHIP** pattern) { auto handle = workspace_->getRocsparseHandle(); rocsparse_status roc_info = rocsparse_set_pointer_mode(handle, rocsparse_pointer_mode_host); int info = (roc_info != rocsparse_status_success); workspace_->setRocsparseHandle(handle); - auto a_v = A->getValues(memory::DEVICE); auto a_i = A->getRowData(memory::DEVICE); auto a_j = A->getColData(memory::DEVICE); - auto b_v = B->getValues(memory::DEVICE); auto b_i = B->getRowData(memory::DEVICE); auto b_j = B->getColData(memory::DEVICE); @@ -407,10 +403,6 @@ namespace ReSolve index_type nnz_a = A->getNnz(); index_type nnz_b = B->getNnz(); - real_type* c_v = nullptr; - index_type* c_i = nullptr; - index_type* c_j = nullptr; - *pattern = new ScaleAddBufferHIP(n + 1); rocsparse_mat_descr descr_a = (*pattern)->getMatrixDescriptor(); index_type nnz_total; @@ -528,7 +520,8 @@ namespace ReSolve { ScaleAddBufferHIP* pattern = nullptr; matrix::Csr C(A->getNumRows(), A->getNumColumns(), A->getNnz()); - info = info || allocateForSum(A, alpha, &I, 1., &pattern); + info = info || allocateForSum(A, &I, &pattern); + workspace_->setScaleAddIBuffer(pattern); workspace_->scaleAddISetupDone(); C.setNnz(pattern->getNnz()); @@ -563,7 +556,7 @@ namespace ReSolve { ScaleAddBufferHIP* pattern = nullptr; matrix::Csr C(A->getNumRows(), A->getNumColumns(), A->getNnz()); - info = info || allocateForSum(A, alpha, B, 1., &pattern); + info = info || allocateForSum(A, B, &pattern); workspace_->setScaleAddBBuffer(pattern); workspace_->scaleAddBSetupDone(); C.setNnz(pattern->getNnz()); diff --git a/resolve/matrix/MatrixHandlerHip.hpp b/resolve/matrix/MatrixHandlerHip.hpp index 3d6aa51c5..4f7c092b3 100644 --- a/resolve/matrix/MatrixHandlerHip.hpp +++ b/resolve/matrix/MatrixHandlerHip.hpp @@ -61,7 +61,7 @@ namespace ReSolve void setValuesChanged(bool isValuesChanged) override; private: - int allocateForSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, ScaleAddBufferHIP** pattern); + int allocateForSum(matrix::Csr* A, matrix::Csr* B, ScaleAddBufferHIP** pattern); int computeSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C, ScaleAddBufferHIP* pattern); LinAlgWorkspaceHIP* workspace_{nullptr}; From 73935014aedf26c595b83cc0bd79237ff63624e8 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Thu, 20 Nov 2025 13:48:49 -0500 Subject: [PATCH 42/47] MatrixHandler: fix return value Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandlerCuda.cpp | 2 +- resolve/matrix/MatrixHandlerHip.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index e77875cd2..b535c2837 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -456,7 +456,7 @@ namespace ReSolve cu_info = cusparseXcsrgeam2Nnz(handle, m, n, descr_a, nnz_a, a_i, a_j, descr_a, nnz_b, b_i, b_j, descr_a, (*pattern)->getRowData(), &nnz_total, (*pattern)->getBuffer()); info = info || (cu_info != CUSPARSE_STATUS_SUCCESS); (*pattern)->setNnz(nnz_total); - return 0; + return info; } /** diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp index 7e9a5cc80..80e7bd7e0 100644 --- a/resolve/matrix/MatrixHandlerHip.cpp +++ b/resolve/matrix/MatrixHandlerHip.cpp @@ -410,7 +410,7 @@ namespace ReSolve roc_info = rocsparse_bsrgeam_nnzb(handle, rocsparse_direction_row, m, n, 1, descr_a, nnz_a, a_i, a_j, descr_a, nnz_b, b_i, b_j, descr_a, (*pattern)->getRowData(), &nnz_total); info = info || (roc_info != rocsparse_status_success); (*pattern)->setNnz(nnz_total); - return 0; + return info; } /** From 95609e8f2f855a01cdeb2eb9785f2f97e7d5465b Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Thu, 20 Nov 2025 13:52:46 -0500 Subject: [PATCH 43/47] update CHANGELOG.md Signed-off-by: Steven Hahn --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a112d7a3b..4c9e131f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ - Added cmake-format +- The MatrixHandler::scaleAddI function was added to calculate A := cA + I + +- The MatrixHandler::scaleAddB function was added to calculate A := cA + B + ## Changes to Re::Solve in release 0.99.2 ### Major Features From 1c3da0c2d61d3dc5558eb9188d8a3723784ea4b5 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Mon, 24 Nov 2025 15:51:37 -0500 Subject: [PATCH 44/47] Move ScaleAddBuffer to a new file Signed-off-by: Steven Hahn --- resolve/matrix/MatrixHandlerCpu.cpp | 1 + resolve/matrix/MatrixHandlerCuda.cpp | 1 + resolve/matrix/MatrixHandlerHip.cpp | 1 + resolve/workspace/CMakeLists.txt | 10 +-- resolve/workspace/LinAlgWorkspaceCUDA.cpp | 98 +-------------------- resolve/workspace/LinAlgWorkspaceCUDA.hpp | 23 +---- resolve/workspace/LinAlgWorkspaceCpu.cpp | 63 +------------ resolve/workspace/LinAlgWorkspaceCpu.hpp | 17 +--- resolve/workspace/LinAlgWorkspaceHIP.cpp | 76 ---------------- resolve/workspace/LinAlgWorkspaceHIP.hpp | 19 ---- resolve/workspace/ScaleAddBufferCUDA.cpp | 102 ++++++++++++++++++++++ resolve/workspace/ScaleAddBufferCUDA.hpp | 33 +++++++ resolve/workspace/ScaleAddBufferCpu.cpp | 69 +++++++++++++++ resolve/workspace/ScaleAddBufferCpu.hpp | 23 +++++ resolve/workspace/ScaleAddBufferHIP.cpp | 0 resolve/workspace/ScaleAddBufferHIP.hpp | 30 +++++++ 16 files changed, 270 insertions(+), 296 deletions(-) create mode 100644 resolve/workspace/ScaleAddBufferCUDA.cpp create mode 100644 resolve/workspace/ScaleAddBufferCUDA.hpp create mode 100644 resolve/workspace/ScaleAddBufferCpu.cpp create mode 100644 resolve/workspace/ScaleAddBufferCpu.hpp create mode 100644 resolve/workspace/ScaleAddBufferHIP.cpp create mode 100644 resolve/workspace/ScaleAddBufferHIP.hpp diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index 499a8fe3c..f8b5331b5 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace ReSolve { diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index b535c2837..25f013c6e 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace ReSolve { diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp index 80e7bd7e0..20a28e3a1 100644 --- a/resolve/matrix/MatrixHandlerHip.cpp +++ b/resolve/matrix/MatrixHandlerHip.cpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace ReSolve { diff --git a/resolve/workspace/CMakeLists.txt b/resolve/workspace/CMakeLists.txt index d9849e6cc..10283165e 100644 --- a/resolve/workspace/CMakeLists.txt +++ b/resolve/workspace/CMakeLists.txt @@ -7,16 +7,16 @@ ]] # C++ code -set(ReSolve_Workspace_SRC LinAlgWorkspaceCpu.cpp) +set(ReSolve_Workspace_SRC ScaleAddBufferCpu.cpp LinAlgWorkspaceCpu.cpp) # C++ code that depends on CUDA SDK libraries -set(ReSolve_Workspace_CUDASDK_SRC LinAlgWorkspaceCUDA.cpp) +set(ReSolve_Workspace_CUDASDK_SRC ScaleAddBufferCUDA.cpp LinAlgWorkspaceCUDA.cpp) -set(ReSolve_Workspace_ROCM_SRC LinAlgWorkspaceHIP.cpp) +set(ReSolve_Workspace_ROCM_SRC ScaleAddBufferHIP.cpp LinAlgWorkspaceHIP.cpp) set(ReSolve_Workspace_HEADER_INSTALL - LinAlgWorkspace.hpp LinAlgWorkspaceCpu.hpp LinAlgWorkspaceCUDA.hpp - LinAlgWorkspaceHIP.hpp + ScaleAddBufferCpu.hpp ScaleAddBufferCUDA.upp, ScaleAddBufferHIP.hpp LinAlgWorkspace.hpp + LinAlgWorkspaceCpu.hpp LinAlgWorkspaceCUDA.hpp LinAlgWorkspaceHIP.hpp ) add_library(resolve_workspace SHARED ${ReSolve_Workspace_SRC}) diff --git a/resolve/workspace/LinAlgWorkspaceCUDA.cpp b/resolve/workspace/LinAlgWorkspaceCUDA.cpp index ec9b5fa68..579876665 100644 --- a/resolve/workspace/LinAlgWorkspaceCUDA.cpp +++ b/resolve/workspace/LinAlgWorkspaceCUDA.cpp @@ -1,104 +1,8 @@ #include +#include namespace ReSolve { - - /** - * @brief Store sparsity pattern - * - * @param[in] nrows - number of rows - */ - ScaleAddBufferCUDA::ScaleAddBufferCUDA(index_type numRows) - : numRows_(numRows), bufferSize_(0) - { - mem_.allocateArrayOnDevice(&rowData_, numRows_ + 1); - cusparseCreateMatDescr(&mat_A_); - cusparseSetMatType(mat_A_, CUSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatIndexBase(mat_A_, CUSPARSE_INDEX_BASE_ZERO); - } - - /** - * @brief Destructor - * - */ - ScaleAddBufferCUDA::~ScaleAddBufferCUDA() - { - mem_.deleteOnDevice(rowData_); - mem_.deleteOnDevice(buffer_); - cusparseDestroyMatDescr(mat_A_); - } - - /** - * @brief Retrieve row sparsity pattern - * - * @return precalculated row pointers - */ - index_type* ScaleAddBufferCUDA::getRowData() - { - return rowData_; - } - - /** - * @brief Retrieve matrix descriptor - * - * @return matrix descriptor set for scaleAddB, scaleAddI - */ - cusparseMatDescr_t ScaleAddBufferCUDA::getMatrixDescriptor() - { - return mat_A_; - } - - /** - * @brief Allocate memory for cusparse buffer - * - * @param[in] bufferSize calculated array size - */ - void ScaleAddBufferCUDA::allocateBuffer(size_t bufferSize) - { - bufferSize_ = bufferSize; - mem_.allocateBufferOnDevice(&buffer_, bufferSize_); - } - - /** - * @brief Retrieve cusparse buffer - * - * @return cusparse buffer - */ - void* ScaleAddBufferCUDA::getBuffer() - { - return buffer_; - } - - /** - * @brief get number of matrix rows - * - * @return number of matrix rows. - */ - index_type ScaleAddBufferCUDA::getNumRows() - { - return numRows_; - } - - /** - * @brief Get number of non-zeros. - * - * @return number of non-zeros - */ - index_type ScaleAddBufferCUDA::getNnz() - { - return nnz_; - } - - /** - * @brief set number of non-zeros. - * - * @param[in] nnz number of non-zeros - */ - void ScaleAddBufferCUDA::setNnz(index_type nnz) - { - nnz_ = nnz; - } - LinAlgWorkspaceCUDA::LinAlgWorkspaceCUDA() { handle_cusolversp_ = nullptr; diff --git a/resolve/workspace/LinAlgWorkspaceCUDA.hpp b/resolve/workspace/LinAlgWorkspaceCUDA.hpp index 578a8f2e9..01b30f6e4 100644 --- a/resolve/workspace/LinAlgWorkspaceCUDA.hpp +++ b/resolve/workspace/LinAlgWorkspaceCUDA.hpp @@ -9,28 +9,7 @@ namespace ReSolve { - class ScaleAddBufferCUDA - { - public: - ScaleAddBufferCUDA(index_type numRows); - ~ScaleAddBufferCUDA(); - index_type* getRowData(); - void allocateBuffer(size_t bufferSize); - void* getBuffer(); - cusparseMatDescr_t getMatrixDescriptor(); - index_type getNumRows(); - void setNnz(index_type nnz); - index_type getNnz(); - - private: - index_type* rowData_; - void* buffer_; - cusparseMatDescr_t mat_A_; - index_type numRows_; - index_type nnz_; - size_t bufferSize_; - MemoryHandler mem_; - }; + class ScaleAddBufferCUDA; class LinAlgWorkspaceCUDA { diff --git a/resolve/workspace/LinAlgWorkspaceCpu.cpp b/resolve/workspace/LinAlgWorkspaceCpu.cpp index fede4a1cd..e632afa94 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.cpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.cpp @@ -4,69 +4,10 @@ #include #include +#include + namespace ReSolve { - /** - * @brief Store sparsity pattern - * - * @param[in] row_data - row data (array of integers, length:nrows+1) - * @param[in] col_data - column data (array of integers, length: nnz) - */ - ScaleAddBufferCpu::ScaleAddBufferCpu(std::vector rowData, std::vector colData) - : rowData_(std::move(rowData)), colData_(std::move(colData)) - { - } - - /** - * @brief Retrieve row sparsity pattern - * - * @return precalculated row pointers - */ - index_type* ScaleAddBufferCpu::getRowData() - { - return rowData_.data(); - } - - /** - * @brief Retrieve column sparsity pattern - * - * @return precalculated column indices - */ - index_type* ScaleAddBufferCpu::getColumnData() - { - return colData_.data(); - } - - /** - * @brief get number of matrix rows - * - * @return number of matrix rows. - */ - index_type ScaleAddBufferCpu::getNumRows() - { - return static_cast(rowData_.size()) - 1; - } - - /** - * @brief get number of matrix columns - * - * @return number of matrix columns. - */ - index_type ScaleAddBufferCpu::getNumColumns() - { - return getNumRows(); - } - - /** - * @brief Get number of non-zeros. - * - * @return number of non-zeros - */ - index_type ScaleAddBufferCpu::getNnz() - { - return static_cast(colData_.size()); - } - LinAlgWorkspaceCpu::LinAlgWorkspaceCpu() { } diff --git a/resolve/workspace/LinAlgWorkspaceCpu.hpp b/resolve/workspace/LinAlgWorkspaceCpu.hpp index edd7d3374..7c0b758c7 100644 --- a/resolve/workspace/LinAlgWorkspaceCpu.hpp +++ b/resolve/workspace/LinAlgWorkspaceCpu.hpp @@ -1,25 +1,10 @@ #pragma once -#include - #include namespace ReSolve { - class ScaleAddBufferCpu - { - public: - ScaleAddBufferCpu(std::vector rowData, std::vector colData); - index_type* getRowData(); - index_type* getColumnData(); - index_type getNumRows(); - index_type getNumColumns(); - index_type getNnz(); - - private: - std::vector rowData_; - std::vector colData_; - }; + class ScaleAddBufferCpu; class LinAlgWorkspaceCpu { diff --git a/resolve/workspace/LinAlgWorkspaceHIP.cpp b/resolve/workspace/LinAlgWorkspaceHIP.cpp index 52b2577cf..65da8c175 100644 --- a/resolve/workspace/LinAlgWorkspaceHIP.cpp +++ b/resolve/workspace/LinAlgWorkspaceHIP.cpp @@ -4,82 +4,6 @@ namespace ReSolve { - - /** - * @brief Store sparsity pattern - * - * @param[in] row_data - pointer to row data (array of integers, length:nrows+1) - * @param[in] nrows - number of rows - * @param[in] col_data - pointer to column data (array of integers, length: nnz) - * @param[in] nnz - number of non-zeros - */ - ScaleAddBufferHIP::ScaleAddBufferHIP(index_type numRows) - : numRows_(numRows) - { - rocsparse_create_mat_descr(&mat_A_); - mem_.allocateArrayOnDevice(&rowData_, numRows_ + 1); - } - - /** - * @brief Destructor - * - */ - ScaleAddBufferHIP::~ScaleAddBufferHIP() - { - mem_.deleteOnDevice(rowData_); - rocsparse_destroy_mat_descr(mat_A_); - } - - /** - * @brief Retrieve row sparsity pattern - * - * @return precalculated row pointers - */ - index_type* ScaleAddBufferHIP::getRowData() - { - return rowData_; - } - - /** - * @brief Retrieve matrix descriptor - * - * @return matrix descriptor set for scaleAddB, scaleAddI - */ - rocsparse_mat_descr ScaleAddBufferHIP::getMatrixDescriptor() - { - return mat_A_; - } - - /** - * @brief get number of matrix rows - * - * @return number of matrix rows. - */ - index_type ScaleAddBufferHIP::getNumRows() - { - return numRows_; - } - - /** - * @brief Get number of non-zeros. - * - * @return number of non-zeros - */ - index_type ScaleAddBufferHIP::getNnz() - { - return nnz_; - } - - /** - * @brief Get number of non-zeros. - * - * @param[in] nnz number of non-zeros - */ - void ScaleAddBufferHIP::setNnz(index_type nnz) - { - nnz_ = nnz; - } - LinAlgWorkspaceHIP::LinAlgWorkspaceHIP() { handle_rocsparse_ = nullptr; diff --git a/resolve/workspace/LinAlgWorkspaceHIP.hpp b/resolve/workspace/LinAlgWorkspaceHIP.hpp index 3cbb560fe..abc8c297d 100644 --- a/resolve/workspace/LinAlgWorkspaceHIP.hpp +++ b/resolve/workspace/LinAlgWorkspaceHIP.hpp @@ -10,25 +10,6 @@ namespace ReSolve { - class ScaleAddBufferHIP - { - public: - ScaleAddBufferHIP(index_type numRows); - ~ScaleAddBufferHIP(); - index_type* getRowData(); - rocsparse_mat_descr getMatrixDescriptor(); - index_type getNumRows(); - void setNnz(index_type nnz); - index_type getNnz(); - - private: - index_type* rowData_; - rocsparse_mat_descr mat_A_; - index_type numRows_; - index_type nnz_; - MemoryHandler mem_; - }; - class LinAlgWorkspaceHIP { public: diff --git a/resolve/workspace/ScaleAddBufferCUDA.cpp b/resolve/workspace/ScaleAddBufferCUDA.cpp new file mode 100644 index 000000000..76105b66c --- /dev/null +++ b/resolve/workspace/ScaleAddBufferCUDA.cpp @@ -0,0 +1,102 @@ +#include + +namespace ReSolve +{ + + /** + * @brief Store sparsity pattern + * + * @param[in] nrows - number of rows + */ + ScaleAddBufferCUDA::ScaleAddBufferCUDA(index_type numRows) + : numRows_(numRows), bufferSize_(0) + { + mem_.allocateArrayOnDevice(&rowData_, numRows_ + 1); + cusparseCreateMatDescr(&mat_A_); + cusparseSetMatType(mat_A_, CUSPARSE_MATRIX_TYPE_GENERAL); + cusparseSetMatIndexBase(mat_A_, CUSPARSE_INDEX_BASE_ZERO); + } + + /** + * @brief Destructor + * + */ + ScaleAddBufferCUDA::~ScaleAddBufferCUDA() + { + mem_.deleteOnDevice(rowData_); + mem_.deleteOnDevice(buffer_); + cusparseDestroyMatDescr(mat_A_); + } + + /** + * @brief Retrieve row sparsity pattern + * + * @return precalculated row pointers + */ + index_type* ScaleAddBufferCUDA::getRowData() + { + return rowData_; + } + + /** + * @brief Retrieve matrix descriptor + * + * @return matrix descriptor set for scaleAddB, scaleAddI + */ + cusparseMatDescr_t ScaleAddBufferCUDA::getMatrixDescriptor() + { + return mat_A_; + } + + /** + * @brief Allocate memory for cusparse buffer + * + * @param[in] bufferSize calculated array size + */ + void ScaleAddBufferCUDA::allocateBuffer(size_t bufferSize) + { + bufferSize_ = bufferSize; + mem_.allocateBufferOnDevice(&buffer_, bufferSize_); + } + + /** + * @brief Retrieve cusparse buffer + * + * @return cusparse buffer + */ + void* ScaleAddBufferCUDA::getBuffer() + { + return buffer_; + } + + /** + * @brief get number of matrix rows + * + * @return number of matrix rows. + */ + index_type ScaleAddBufferCUDA::getNumRows() + { + return numRows_; + } + + /** + * @brief Get number of non-zeros. + * + * @return number of non-zeros + */ + index_type ScaleAddBufferCUDA::getNnz() + { + return nnz_; + } + + /** + * @brief set number of non-zeros. + * + * @param[in] nnz number of non-zeros + */ + void ScaleAddBufferCUDA::setNnz(index_type nnz) + { + nnz_ = nnz; + } + +} // namespace ReSolve \ No newline at end of file diff --git a/resolve/workspace/ScaleAddBufferCUDA.hpp b/resolve/workspace/ScaleAddBufferCUDA.hpp new file mode 100644 index 000000000..9636c9330 --- /dev/null +++ b/resolve/workspace/ScaleAddBufferCUDA.hpp @@ -0,0 +1,33 @@ +#pragma once + +#include "cusparse.h" +#include +#include + +namespace ReSolve +{ + + class ScaleAddBufferCUDA + { + public: + ScaleAddBufferCUDA(index_type numRows); + ~ScaleAddBufferCUDA(); + index_type* getRowData(); + void allocateBuffer(size_t bufferSize); + void* getBuffer(); + cusparseMatDescr_t getMatrixDescriptor(); + index_type getNumRows(); + void setNnz(index_type nnz); + index_type getNnz(); + + private: + index_type* rowData_; + void* buffer_; + cusparseMatDescr_t mat_A_; + index_type numRows_; + index_type nnz_; + size_t bufferSize_; + MemoryHandler mem_; + }; + +} // namespace ReSolve \ No newline at end of file diff --git a/resolve/workspace/ScaleAddBufferCpu.cpp b/resolve/workspace/ScaleAddBufferCpu.cpp new file mode 100644 index 000000000..4f9066e89 --- /dev/null +++ b/resolve/workspace/ScaleAddBufferCpu.cpp @@ -0,0 +1,69 @@ +#include +#include +#include + +#include + +namespace ReSolve +{ + /** + * @brief Store sparsity pattern + * + * @param[in] row_data - row data (array of integers, length:nrows+1) + * @param[in] col_data - column data (array of integers, length: nnz) + */ + ScaleAddBufferCpu::ScaleAddBufferCpu(std::vector rowData, std::vector colData) + : rowData_(std::move(rowData)), colData_(std::move(colData)) + { + } + + /** + * @brief Retrieve row sparsity pattern + * + * @return precalculated row pointers + */ + index_type* ScaleAddBufferCpu::getRowData() + { + return rowData_.data(); + } + + /** + * @brief Retrieve column sparsity pattern + * + * @return precalculated column indices + */ + index_type* ScaleAddBufferCpu::getColumnData() + { + return colData_.data(); + } + + /** + * @brief get number of matrix rows + * + * @return number of matrix rows. + */ + index_type ScaleAddBufferCpu::getNumRows() + { + return static_cast(rowData_.size()) - 1; + } + + /** + * @brief get number of matrix columns + * + * @return number of matrix columns. + */ + index_type ScaleAddBufferCpu::getNumColumns() + { + return getNumRows(); + } + + /** + * @brief Get number of non-zeros. + * + * @return number of non-zeros + */ + index_type ScaleAddBufferCpu::getNnz() + { + return static_cast(colData_.size()); + } +} // namespace ReSolve diff --git a/resolve/workspace/ScaleAddBufferCpu.hpp b/resolve/workspace/ScaleAddBufferCpu.hpp new file mode 100644 index 000000000..61fd014a8 --- /dev/null +++ b/resolve/workspace/ScaleAddBufferCpu.hpp @@ -0,0 +1,23 @@ +#pragma once + +#include + +#include + +namespace ReSolve +{ + class ScaleAddBufferCpu + { + public: + ScaleAddBufferCpu(std::vector rowData, std::vector colData); + index_type* getRowData(); + index_type* getColumnData(); + index_type getNumRows(); + index_type getNumColumns(); + index_type getNnz(); + + private: + std::vector rowData_; + std::vector colData_; + }; +} // namespace ReSolve diff --git a/resolve/workspace/ScaleAddBufferHIP.cpp b/resolve/workspace/ScaleAddBufferHIP.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/resolve/workspace/ScaleAddBufferHIP.hpp b/resolve/workspace/ScaleAddBufferHIP.hpp new file mode 100644 index 000000000..4fb58efd5 --- /dev/null +++ b/resolve/workspace/ScaleAddBufferHIP.hpp @@ -0,0 +1,30 @@ +#pragma once + +#include +#include +#include + +#include +#include + +namespace ReSolve +{ + + class ScaleAddBufferHIP + { + public: + ScaleAddBufferHIP(index_type numRows); + ~ScaleAddBufferHIP(); + index_type* getRowData(); + rocsparse_mat_descr getMatrixDescriptor(); + index_type getNumRows(); + void setNnz(index_type nnz); + index_type getNnz(); + + private: + index_type* rowData_; + rocsparse_mat_descr mat_A_; + index_type numRows_; + index_type nnz_; + MemoryHandler mem_; + }; From 2e1e760e4f94b3530d19e8c0a897929ef6259ddf Mon Sep 17 00:00:00 2001 From: quantumsteve Date: Mon, 24 Nov 2025 20:52:14 +0000 Subject: [PATCH 45/47] Apply pre-commmit fixes --- resolve/workspace/CMakeLists.txt | 13 ++++++++++--- resolve/workspace/ScaleAddBufferCUDA.cpp | 2 +- resolve/workspace/ScaleAddBufferCUDA.hpp | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/resolve/workspace/CMakeLists.txt b/resolve/workspace/CMakeLists.txt index 10283165e..a0760396f 100644 --- a/resolve/workspace/CMakeLists.txt +++ b/resolve/workspace/CMakeLists.txt @@ -10,13 +10,20 @@ set(ReSolve_Workspace_SRC ScaleAddBufferCpu.cpp LinAlgWorkspaceCpu.cpp) # C++ code that depends on CUDA SDK libraries -set(ReSolve_Workspace_CUDASDK_SRC ScaleAddBufferCUDA.cpp LinAlgWorkspaceCUDA.cpp) +set(ReSolve_Workspace_CUDASDK_SRC ScaleAddBufferCUDA.cpp + LinAlgWorkspaceCUDA.cpp +) set(ReSolve_Workspace_ROCM_SRC ScaleAddBufferHIP.cpp LinAlgWorkspaceHIP.cpp) set(ReSolve_Workspace_HEADER_INSTALL - ScaleAddBufferCpu.hpp ScaleAddBufferCUDA.upp, ScaleAddBufferHIP.hpp LinAlgWorkspace.hpp - LinAlgWorkspaceCpu.hpp LinAlgWorkspaceCUDA.hpp LinAlgWorkspaceHIP.hpp + ScaleAddBufferCpu.hpp + ScaleAddBufferCUDA.upp, + ScaleAddBufferHIP.hpp + LinAlgWorkspace.hpp + LinAlgWorkspaceCpu.hpp + LinAlgWorkspaceCUDA.hpp + LinAlgWorkspaceHIP.hpp ) add_library(resolve_workspace SHARED ${ReSolve_Workspace_SRC}) diff --git a/resolve/workspace/ScaleAddBufferCUDA.cpp b/resolve/workspace/ScaleAddBufferCUDA.cpp index 76105b66c..7bbd33f44 100644 --- a/resolve/workspace/ScaleAddBufferCUDA.cpp +++ b/resolve/workspace/ScaleAddBufferCUDA.cpp @@ -99,4 +99,4 @@ namespace ReSolve nnz_ = nnz; } -} // namespace ReSolve \ No newline at end of file +} // namespace ReSolve diff --git a/resolve/workspace/ScaleAddBufferCUDA.hpp b/resolve/workspace/ScaleAddBufferCUDA.hpp index 9636c9330..683eb3894 100644 --- a/resolve/workspace/ScaleAddBufferCUDA.hpp +++ b/resolve/workspace/ScaleAddBufferCUDA.hpp @@ -30,4 +30,4 @@ namespace ReSolve MemoryHandler mem_; }; -} // namespace ReSolve \ No newline at end of file +} // namespace ReSolve From cce262447da78ab0f6de25003b1725b5b8e18b57 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Mon, 24 Nov 2025 16:06:45 -0500 Subject: [PATCH 46/47] fix HIP build Signed-off-by: Steven Hahn --- resolve/workspace/LinAlgWorkspaceHIP.cpp | 1 + resolve/workspace/LinAlgWorkspaceHIP.hpp | 1 + resolve/workspace/ScaleAddBufferHIP.cpp | 81 ++++++++++++++++++++++++ resolve/workspace/ScaleAddBufferHIP.hpp | 2 + 4 files changed, 85 insertions(+) diff --git a/resolve/workspace/LinAlgWorkspaceHIP.cpp b/resolve/workspace/LinAlgWorkspaceHIP.cpp index 65da8c175..1504a42c1 100644 --- a/resolve/workspace/LinAlgWorkspaceHIP.cpp +++ b/resolve/workspace/LinAlgWorkspaceHIP.cpp @@ -1,6 +1,7 @@ #include #include +#include namespace ReSolve { diff --git a/resolve/workspace/LinAlgWorkspaceHIP.hpp b/resolve/workspace/LinAlgWorkspaceHIP.hpp index abc8c297d..fe08d9619 100644 --- a/resolve/workspace/LinAlgWorkspaceHIP.hpp +++ b/resolve/workspace/LinAlgWorkspaceHIP.hpp @@ -9,6 +9,7 @@ namespace ReSolve { + class ScaleAddBufferHIP; class LinAlgWorkspaceHIP { diff --git a/resolve/workspace/ScaleAddBufferHIP.cpp b/resolve/workspace/ScaleAddBufferHIP.cpp index e69de29bb..2bce3e114 100644 --- a/resolve/workspace/ScaleAddBufferHIP.cpp +++ b/resolve/workspace/ScaleAddBufferHIP.cpp @@ -0,0 +1,81 @@ +#include + +#include + +namespace ReSolve +{ + /** + * @brief Store sparsity pattern + * + * @param[in] row_data - pointer to row data (array of integers, length:nrows+1) + * @param[in] nrows - number of rows + * @param[in] col_data - pointer to column data (array of integers, length: nnz) + * @param[in] nnz - number of non-zeros + */ + ScaleAddBufferHIP::ScaleAddBufferHIP(index_type numRows) + : numRows_(numRows) + { + rocsparse_create_mat_descr(&mat_A_); + mem_.allocateArrayOnDevice(&rowData_, numRows_ + 1); + } + + /** + * @brief Destructor + * + */ + ScaleAddBufferHIP::~ScaleAddBufferHIP() + { + mem_.deleteOnDevice(rowData_); + rocsparse_destroy_mat_descr(mat_A_); + } + + /** + * @brief Retrieve row sparsity pattern + * + * @return precalculated row pointers + */ + index_type* ScaleAddBufferHIP::getRowData() + { + return rowData_; + } + + /** + * @brief Retrieve matrix descriptor + * + * @return matrix descriptor set for scaleAddB, scaleAddI + */ + rocsparse_mat_descr ScaleAddBufferHIP::getMatrixDescriptor() + { + return mat_A_; + } + + /** + * @brief get number of matrix rows + * + * @return number of matrix rows. + */ + index_type ScaleAddBufferHIP::getNumRows() + { + return numRows_; + } + + /** + * @brief Get number of non-zeros. + * + * @return number of non-zeros + */ + index_type ScaleAddBufferHIP::getNnz() + { + return nnz_; + } + + /** + * @brief Get number of non-zeros. + * + * @param[in] nnz number of non-zeros + */ + void ScaleAddBufferHIP::setNnz(index_type nnz) + { + nnz_ = nnz; + } +} diff --git a/resolve/workspace/ScaleAddBufferHIP.hpp b/resolve/workspace/ScaleAddBufferHIP.hpp index 4fb58efd5..836c9694d 100644 --- a/resolve/workspace/ScaleAddBufferHIP.hpp +++ b/resolve/workspace/ScaleAddBufferHIP.hpp @@ -28,3 +28,5 @@ namespace ReSolve index_type nnz_; MemoryHandler mem_; }; +} + From 6801bfd08e5f144566f6558bf94c7a829120bef0 Mon Sep 17 00:00:00 2001 From: quantumsteve Date: Mon, 24 Nov 2025 21:07:18 +0000 Subject: [PATCH 47/47] Apply pre-commmit fixes --- resolve/workspace/ScaleAddBufferHIP.cpp | 6 +++--- resolve/workspace/ScaleAddBufferHIP.hpp | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/resolve/workspace/ScaleAddBufferHIP.cpp b/resolve/workspace/ScaleAddBufferHIP.cpp index 2bce3e114..38e19a22d 100644 --- a/resolve/workspace/ScaleAddBufferHIP.cpp +++ b/resolve/workspace/ScaleAddBufferHIP.cpp @@ -1,7 +1,7 @@ -#include - #include +#include + namespace ReSolve { /** @@ -78,4 +78,4 @@ namespace ReSolve { nnz_ = nnz; } -} +} // namespace ReSolve diff --git a/resolve/workspace/ScaleAddBufferHIP.hpp b/resolve/workspace/ScaleAddBufferHIP.hpp index 836c9694d..29299c00a 100644 --- a/resolve/workspace/ScaleAddBufferHIP.hpp +++ b/resolve/workspace/ScaleAddBufferHIP.hpp @@ -28,5 +28,4 @@ namespace ReSolve index_type nnz_; MemoryHandler mem_; }; -} - +} // namespace ReSolve