diff --git a/CHANGELOG.md b/CHANGELOG.md
index a112d7a3..4c9e131f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,10 @@
 
 - Added cmake-format
 
+- The MatrixHandler::scaleAddI function was added to calculate A := cA + I
+
+- The MatrixHandler::scaleAddB function was added to calculate A := cA + B
+
 ## Changes to Re::Solve in release 0.99.2
 
 ### Major Features
diff --git a/resolve/LinSolverDirectRocSolverRf.cpp b/resolve/LinSolverDirectRocSolverRf.cpp
index 4f0ebc50..4f445db6 100644
--- a/resolve/LinSolverDirectRocSolverRf.cpp
+++ b/resolve/LinSolverDirectRocSolverRf.cpp
@@ -1,5 +1,7 @@
 #include "LinSolverDirectRocSolverRf.hpp"
 
+#include <cassert>
+
 #include <resolve/hip/hipKernels.h>
 
 #include <resolve/Profiling.hpp>
diff --git a/resolve/MemoryUtils.hpp b/resolve/MemoryUtils.hpp
index 702d80ea..ff0e9055 100644
--- a/resolve/MemoryUtils.hpp
+++ b/resolve/MemoryUtils.hpp
@@ -38,7 +38,7 @@ namespace ReSolve
    *
    * @brief Provides basic memory allocation, free and copy functions.
    *
-   * This class provedes abstractions for memory management functiosn for
+   * This class provedes abstractions for memory management functions for
    * different GPU programming models.
    *
    * @tparam Policy - Memory management policy (vendor specific)
diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp
index 1d570af8..86871509 100644
--- a/resolve/matrix/MatrixHandler.cpp
+++ b/resolve/matrix/MatrixHandler.cpp
@@ -351,6 +351,57 @@ namespace ReSolve
     }
   }
 
+  /**
+   * @brief Multiply csr matrix by a constant and add I.
+   * @param[in,out] A - Sparse CSR matrix
+   * @param[in] alpha - scalar parameter
+   * @param[in] memspace - Device where the operation is computed
+   * @return 0 if successful, 1 otherwise
+   */
+  int MatrixHandler::scaleAddI(matrix::Csr* A, real_type alpha, memory::MemorySpace memspace)
+  {
+    assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix is assumed to be in CSR format.\n");
+    assert((A->getValues(memspace) != nullptr || A->getNnz() == 0) && "Matrix values are null!\n");
+    assert(A->getNumRows() == A->getNumColumns() && "Matrix must be square.\n");
+    using namespace ReSolve::memory;
+    switch (memspace)
+    {
+    case HOST:
+      return cpuImpl_->scaleAddI(A, alpha);
+      break;
+    case DEVICE:
+      return devImpl_->scaleAddI(A, alpha);
+      break;
+    }
+    return 1;
+  }
+
+  /**
+   * @brief Multiply csr matrix by a constant and add B.
+   * @param[in,out] A - Sparse CSR matrix
+   * @param[in] alpha - scalar parameter
+   * @param[in] B - Sparse CSR matrix
+   * @param[in] memspace - Device where the operation is computed
+   * @return 0 if successful, 1 otherwise
+   */
+  int MatrixHandler::scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B, memory::MemorySpace memspace)
+  {
+    assert(A->getSparseFormat() == matrix::Sparse::COMPRESSED_SPARSE_ROW && "Matrix is assumed to be in CSR format.\n");
+    assert(A->getValues(memspace) != nullptr && "Matrix values are null!\n");
+    assert(A->getNumRows() == A->getNumColumns() && "Matrix must be square.\n");
+    using namespace ReSolve::memory;
+    switch (memspace)
+    {
+    case HOST:
+      return cpuImpl_->scaleAddB(A, alpha, B);
+      break;
+    case DEVICE:
+      return devImpl_->scaleAddB(A, alpha, B);
+      break;
+    }
+    return 1;
+  }
+
   /**
    * @brief If CUDA support is enabled in the handler.
    *
diff --git a/resolve/matrix/MatrixHandler.hpp b/resolve/matrix/MatrixHandler.hpp
index 6b6785ab..cd7fdcc8 100644
--- a/resolve/matrix/MatrixHandler.hpp
+++ b/resolve/matrix/MatrixHandler.hpp
@@ -62,6 +62,10 @@ namespace ReSolve
 
     void addConst(matrix::Sparse* A, real_type alpha, memory::MemorySpace memspace);
 
+    int scaleAddI(matrix::Csr* A, real_type alpha, memory::MemorySpace memspace);
+
+    int scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B, memory::MemorySpace memspace);
+
     /// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped
     int  matvec(matrix::Sparse*     A,
                 vector_type*        vec_x,
diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp
index eecd48fb..f8b5331b 100644
--- a/resolve/matrix/MatrixHandlerCpu.cpp
+++ b/resolve/matrix/MatrixHandlerCpu.cpp
@@ -8,6 +8,8 @@
 #include <resolve/matrix/Csr.hpp>
 #include <resolve/utilities/logger/Logger.hpp>
 #include <resolve/vector/Vector.hpp>
+#include <resolve/workspace/LinAlgWorkspaceCpu.hpp>
+#include <resolve/workspace/ScaleAddBufferCpu.hpp>
 
 namespace ReSolve
 {
@@ -389,4 +391,368 @@ namespace ReSolve
     }
     return 0;
   }
+
+  /**
+   * @brief Multiply all nonzero values of a sparse matrix by a constant
+   *
+   * @param[in,out] A - Sparse matrix
+   * @param[in] alpha - constant to the added
+   * @return 0 if successful, 1 otherwise
+   */
+  static int scaleConst(matrix::Sparse* A, real_type alpha)
+  {
+    real_type*       values = A->getValues(memory::HOST);
+    const index_type nnz    = A->getNnz();
+    for (index_type i = 0; i < nnz; ++i)
+    {
+      values[i] *= alpha;
+    }
+    return 0;
+  }
+
+  /**
+   * @brief Update matrix with a new number of nonzero elements
+   *
+   * @param[in,out] A - Sparse matrix
+   * @param[in] row_data - pointer to row data (array of integers)
+   * @param[in] col_data - pointer to column data (array of integers)
+   * @param[in] val_data - pointer to value data (array of real numbers)
+   * @param[in] nnz - number of non-zer
+   * @return 0 if successful, 1 otherwise
+   */
+  static int updateMatrix(matrix::Sparse* A, index_type* rowData, index_type* columnData, real_type* valData, index_type nnz)
+  {
+    if (A->destroyMatrixData(memory::HOST) != 0)
+    {
+      return 1;
+    }
+    return A->copyDataFrom(rowData, columnData, valData, nnz, memory::HOST, memory::HOST);
+  }
+
+  /**
+   * @brief Add the identity to a CSR matrix.
+   *
+   * @param[in,out] A - Sparse CSR matrix
+   * @param[in] pattern - precalculated sparsity pattern
+   * @return 0 if successful, 1 otherwise
+   */
+  static int addIWithPattern(matrix::Csr* A, ScaleAddBufferCpu* pattern)
+  {
+    std::vector<real_type> newValues(pattern->getNnz());
+
+    index_type const* const originalRowPointers   = A->getRowData(memory::HOST);
+    index_type const* const originalColumnIndices = A->getColData(memory::HOST);
+    real_type const* const  originalValues        = A->getValues(memory::HOST);
+
+    index_type newNnzCount = 0;
+    for (index_type i = 0; i < A->getNumRows(); ++i)
+    {
+      const index_type originalRowStart = originalRowPointers[i];
+      const index_type originalRowEnd   = originalRowPointers[i + 1];
+
+      bool diagonalAdded = false;
+      for (index_type j = originalRowStart; j < originalRowEnd; ++j)
+      {
+        if (originalColumnIndices[j] == i)
+        {
+          // Diagonal element found in original matrix
+          newValues[newNnzCount] = originalValues[j] + 1.0;
+          newNnzCount++;
+          diagonalAdded = true;
+        }
+        else if (originalColumnIndices[j] > i && !diagonalAdded)
+        {
+          // Insert diagonal if not found yet
+          newValues[newNnzCount] = 1.;
+          newNnzCount++;
+          diagonalAdded = true; // Mark as added to prevent re-insertion
+          // Then add the current original element
+          newValues[newNnzCount] = originalValues[j];
+          newNnzCount++;
+        }
+        else
+        {
+          // Elements before diagonal, elements after diagonal and the
+          // diagonal is already handled
+          newValues[newNnzCount] = originalValues[j];
+          newNnzCount++;
+        }
+      }
+
+      // If diagonal element was not present in original row
+      if (!diagonalAdded)
+      {
+        newValues[newNnzCount] = 1.;
+        newNnzCount++;
+      }
+    }
+
+    assert(newNnzCount == pattern->getNnz());
+    index_type info = updateMatrix(A, pattern->getRowData(), pattern->getColumnData(), newValues.data(), pattern->getNnz());
+    return info;
+  }
+
+  /**
+   * @brief Add a constant to the nonzero values of a csr matrix,
+   *        then add the identity matrix.
+   *
+   * @param[in,out] A - Sparse CSR matrix
+   * @param[in] alpha - constant to the added
+   * @return 0 if successful, 1 otherwise
+   */
+  int MatrixHandlerCpu::scaleAddI(matrix::Csr* A, real_type alpha)
+  {
+    index_type info = scaleConst(A, alpha);
+    if (info == 1)
+    {
+      return info;
+    }
+
+    // Reuse sparsity pattern if it is available
+    if (workspace_->scaleAddISetup())
+    {
+      ScaleAddBufferCpu* pattern = workspace_->getScaleAddIBuffer();
+      return addIWithPattern(A, pattern);
+    }
+
+    std::vector<index_type> newRowPointers(A->getNumRows() + 1);
+    // At most we add one element per row/column
+    index_type              maxNnzCount = A->getNnz() + A->getNumRows();
+    std::vector<index_type> newColumnIndices(maxNnzCount);
+    std::vector<real_type>  newValues(maxNnzCount);
+
+    index_type const* const originalRowPointers = A->getRowData(memory::HOST);
+    index_type const* const originalColIndices  = A->getColData(memory::HOST);
+    real_type const* const  originalValues      = A->getValues(memory::HOST);
+
+    index_type newNnzCount = 0;
+    for (index_type i = 0; i < A->getNumRows(); ++i)
+    {
+      newRowPointers[i]                 = newNnzCount;
+      const index_type originalRowStart = originalRowPointers[i];
+      const index_type originalRowEnd   = originalRowPointers[i + 1];
+
+      bool diagonalAdded = false;
+      for (index_type j = originalRowStart; j < originalRowEnd; ++j)
+      {
+        if (originalColIndices[j] == i)
+        {
+          // Diagonal element found in original matrix
+          newValues[newNnzCount]        = originalValues[j] + 1.0;
+          newColumnIndices[newNnzCount] = i;
+          newNnzCount++;
+          diagonalAdded = true;
+        }
+        else if (originalColIndices[j] > i && !diagonalAdded)
+        {
+          // Insert diagonal if not found yet
+          newValues[newNnzCount]        = 1.;
+          newColumnIndices[newNnzCount] = i;
+          newNnzCount++;
+          diagonalAdded = true; // Mark as added to prevent re-insertion
+          // Then add the current original element
+          newValues[newNnzCount]        = originalValues[j];
+          newColumnIndices[newNnzCount] = originalColIndices[j];
+          newNnzCount++;
+        }
+        else
+        {
+          // Elements before diagonal, elements after diagonal and the
+          // diagonal is already handled
+          newValues[newNnzCount]        = originalValues[j];
+          newColumnIndices[newNnzCount] = originalColIndices[j];
+          newNnzCount++;
+        }
+      }
+
+      // If diagonal element was not present in original row
+      if (!diagonalAdded)
+      {
+        newValues[newNnzCount]        = 1.;
+        newColumnIndices[newNnzCount] = i;
+        newNnzCount++;
+      }
+    }
+    newRowPointers[A->getNumRows()] = newNnzCount;
+    assert(newNnzCount <= maxNnzCount);
+    newColumnIndices.resize(newNnzCount);
+    auto pattern = new ScaleAddBufferCpu(std::move(newRowPointers), std::move(newColumnIndices));
+    // workspace_ owns pattern
+    workspace_->setScaleAddIBuffer(pattern);
+    updateMatrix(A, pattern->getRowData(), pattern->getColumnData(), newValues.data(), pattern->getNnz());
+    return 0;
+  }
+
+  /**
+   * @brief Add the identity to a CSR matrix.
+   *
+   * @param[in,out] A - Sparse CSR matrix
+   * @param[in,out] B - Sparse CSR matrix
+   * @param[in] pattern - precalculated sparsity pattern
+   * @return 0 if successful, 1 otherwise
+   */
+  static int addBWithPattern(matrix::Csr* A, matrix::Csr* B, ScaleAddBufferCpu* pattern)
+  {
+    index_type const* const aRowPointers = A->getRowData(memory::HOST);
+    index_type const* const aColIndices  = A->getColData(memory::HOST);
+    real_type const* const  aValues      = A->getValues(memory::HOST);
+
+    index_type const* const bRowPointers = B->getRowData(memory::HOST);
+    index_type const* const bColIndices  = B->getColData(memory::HOST);
+    real_type const* const  bValues      = B->getValues(memory::HOST);
+
+    std::vector<real_type> newValues;
+    newValues.reserve(pattern->getNnz());
+
+    for (index_type i = 0; i < A->getNumRows(); ++i)
+    {
+      const index_type startA = aRowPointers[i];
+      const index_type endA   = aRowPointers[i + 1];
+      const index_type startB = bRowPointers[i];
+      const index_type endB   = bRowPointers[i + 1];
+
+      index_type ptrA = startA;
+      index_type ptrB = startB;
+
+      // Merge the non-zero elements for the current row
+      while (ptrA < endA || ptrB < endB)
+      {
+        // Case 1: All remaining elements are in A
+        if (ptrB >= endB)
+        {
+          newValues.push_back(aValues[ptrA]);
+          ptrA++;
+        }
+        // Case 2: All remaining elements are in B
+        else if (ptrA >= endA)
+        {
+          newValues.push_back(bValues[ptrB]);
+          ptrB++;
+        }
+        // Case 3: Elements in both A and B. Compare column indices.
+        else
+        {
+          if (aColIndices[ptrA] < bColIndices[ptrB])
+          {
+            newValues.push_back(aValues[ptrA]);
+            ptrA++;
+          }
+          else if (bColIndices[ptrB] < aColIndices[ptrA])
+          {
+            newValues.push_back(bValues[ptrB]);
+            ptrB++;
+          }
+          // Case 4: Same column index, add values
+          else
+          {
+            real_type sum = aValues[ptrA] + bValues[ptrB];
+            newValues.push_back(sum);
+            ptrA++;
+            ptrB++;
+          }
+        }
+      }
+    }
+    assert(static_cast<index_type>(newValues.size()) == pattern->getNnz());
+    return updateMatrix(A, pattern->getRowData(), pattern->getColumnData(), newValues.data(), pattern->getNnz());
+  }
+
+  /**
+   * @brief Multiply csr matrix by a constant and add B.
+   *
+   * @param[in,out] A - Sparse CSR matrix
+   * @param[in] alpha - constant to the added
+   * @param[in] B - Sparse CSR matrix
+   * @return 0 if successful, 1 otherwise
+   */
+  int MatrixHandlerCpu::scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B)
+  {
+    index_type info = scaleConst(A, alpha);
+    if (info == 1)
+    {
+      return info;
+    }
+
+    // Reuse sparsity pattern if it is available
+    if (workspace_->scaleAddBSetup())
+    {
+      ScaleAddBufferCpu* pattern = workspace_->getScaleAddBBuffer();
+      return addBWithPattern(A, B, pattern);
+    }
+
+    index_type const* const aRowPointers = A->getRowData(memory::HOST);
+    index_type const* const aColIndices  = A->getColData(memory::HOST);
+    real_type const* const  aValues      = A->getValues(memory::HOST);
+
+    index_type const* const bRowPointers = B->getRowData(memory::HOST);
+    index_type const* const bColIndices  = B->getColData(memory::HOST);
+    real_type const* const  bValues      = B->getValues(memory::HOST);
+
+    std::vector<index_type> newRowPointers;
+    newRowPointers.reserve(A->getNumRows() + 1);
+    std::vector<index_type> newColIndices;
+    std::vector<real_type>  newValues;
+
+    newRowPointers.push_back(0); // First element is always 0
+    for (index_type i = 0; i < A->getNumRows(); ++i)
+    {
+      const index_type startA = aRowPointers[i];
+      const index_type endA   = aRowPointers[i + 1];
+      const index_type startB = bRowPointers[i];
+      const index_type endB   = bRowPointers[i + 1];
+
+      index_type ptrA = startA;
+      index_type ptrB = startB;
+
+      // Merge the non-zero elements for the current row
+      while (ptrA < endA || ptrB < endB)
+      {
+        // Case 1: All remaining elements are in A
+        if (ptrB >= endB)
+        {
+          newColIndices.push_back(aColIndices[ptrA]);
+          newValues.push_back(aValues[ptrA]);
+          ptrA++;
+        }
+        // Case 2: All remaining elements are in B
+        else if (ptrA >= endA)
+        {
+          newColIndices.push_back(bColIndices[ptrB]);
+          newValues.push_back(bValues[ptrB]);
+          ptrB++;
+        }
+        // Case 3: Elements in both A and B. Compare column indices.
+        else
+        {
+          if (aColIndices[ptrA] < bColIndices[ptrB])
+          {
+            newColIndices.push_back(aColIndices[ptrA]);
+            newValues.push_back(aValues[ptrA]);
+            ptrA++;
+          }
+          else if (bColIndices[ptrB] < aColIndices[ptrA])
+          {
+            newColIndices.push_back(bColIndices[ptrB]);
+            newValues.push_back(bValues[ptrB]);
+            ptrB++;
+          }
+          // Case 4: Same column index, add values
+          else
+          {
+            real_type sum = aValues[ptrA] + bValues[ptrB];
+            newColIndices.push_back(aColIndices[ptrA]);
+            newValues.push_back(sum);
+            ptrA++;
+            ptrB++;
+          }
+        }
+      }
+      newRowPointers.push_back(static_cast<index_type>(newValues.size()));
+    }
+
+    auto pattern = new ScaleAddBufferCpu(std::move(newRowPointers), std::move(newColIndices));
+    // workspace_ owns pattern
+    workspace_->setScaleAddBBuffer(pattern);
+    return updateMatrix(A, pattern->getRowData(), pattern->getColumnData(), newValues.data(), pattern->getNnz());
+  }
 } // namespace ReSolve
diff --git a/resolve/matrix/MatrixHandlerCpu.hpp b/resolve/matrix/MatrixHandlerCpu.hpp
index 66b81078..6fcb8fc3 100644
--- a/resolve/matrix/MatrixHandlerCpu.hpp
+++ b/resolve/matrix/MatrixHandlerCpu.hpp
@@ -46,6 +46,10 @@ namespace ReSolve
 
     int addConst(matrix::Sparse* A, real_type alpha) override;
 
+    int scaleAddI(matrix::Csr* A, real_type alpha) override;
+
+    int scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B) override;
+
     virtual int matvec(matrix::Sparse*  A,
                        vector_type*     vec_x,
                        vector_type*     vec_result,
diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp
index 7b310843..25f013c6 100644
--- a/resolve/matrix/MatrixHandlerCuda.cpp
+++ b/resolve/matrix/MatrixHandlerCuda.cpp
@@ -1,6 +1,7 @@
 #include "MatrixHandlerCuda.hpp"
 
 #include <algorithm>
+#include <numeric>
 
 #include <resolve/cuda/cudaKernels.h>
 #include <resolve/cuda/cudaVectorKernels.h>
@@ -12,6 +13,7 @@
 #include <resolve/utilities/logger/Logger.hpp>
 #include <resolve/vector/Vector.hpp>
 #include <resolve/workspace/LinAlgWorkspaceCUDA.hpp>
+#include <resolve/workspace/ScaleAddBufferCUDA.hpp>
 
 namespace ReSolve
 {
@@ -400,4 +402,215 @@ namespace ReSolve
     cuda::addConst(nnz, alpha, values);
     return 0;
   }
+
+  /**
+   * @brief Calculate buffer size and sparsity pattern of alpha*A + beta*B
+   *
+   * @param[in] A - CSR matrix
+   * @param[in] alpha - constant to be added to A
+   * @param[in] B - CSR matrix
+   * @param[in] beta - constant to be added to B
+   * @param[in, out] pattern - sparsity pattern and buffer
+   *
+   * @return int error code, 0 if successful
+   */
+  int MatrixHandlerCuda::allocateForSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, ScaleAddBufferCUDA** pattern)
+  {
+    auto             handle  = workspace_->getCusparseHandle();
+    cusparseStatus_t cu_info = cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST);
+    int              info    = (cu_info != CUSPARSE_STATUS_SUCCESS);
+    workspace_->setCusparseHandle(handle);
+
+    auto a_v = A->getValues(memory::DEVICE);
+    auto a_i = A->getRowData(memory::DEVICE);
+    auto a_j = A->getColData(memory::DEVICE);
+
+    auto b_v = B->getValues(memory::DEVICE);
+    auto b_i = B->getRowData(memory::DEVICE);
+    auto b_j = B->getColData(memory::DEVICE);
+
+    index_type m = A->getNumRows();
+    assert(m == B->getNumRows());
+    index_type n = A->getNumColumns();
+    assert(n == B->getNumColumns());
+
+    assert(m == n);
+
+    index_type nnz_a = A->getNnz();
+    index_type nnz_b = B->getNnz();
+
+    real_type*  c_v = nullptr;
+    index_type* c_i = nullptr;
+    index_type* c_j = nullptr;
+
+    *pattern                   = new ScaleAddBufferCUDA(n + 1);
+    cusparseMatDescr_t descr_a = (*pattern)->getMatrixDescriptor();
+    size_t             buffer_byte_size_add;
+    // calculates sum buffer
+    cu_info = cusparseDcsrgeam2_bufferSizeExt(handle, m, n, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j, &buffer_byte_size_add);
+    info    = info || (cu_info != CUSPARSE_STATUS_SUCCESS);
+
+    (*pattern)->allocateBuffer(buffer_byte_size_add);
+
+    index_type nnz_total;
+    // determines sum row offsets and total number of nonzeros
+    cu_info = cusparseXcsrgeam2Nnz(handle, m, n, descr_a, nnz_a, a_i, a_j, descr_a, nnz_b, b_i, b_j, descr_a, (*pattern)->getRowData(), &nnz_total, (*pattern)->getBuffer());
+    info    = info || (cu_info != CUSPARSE_STATUS_SUCCESS);
+    (*pattern)->setNnz(nnz_total);
+    return info;
+  }
+
+  /**
+   * @brief Given sparsity pattern, calculate alpha*A + beta*B
+   *
+   * @param[in] A - CSR matrix
+   * @param[in] alpha - constant to be added to A
+   * @param[in] B - CSR matrix
+   * @param[in] beta - constant to be added to B
+   * @param[in, out] pattern - sparsity pattern and buffer
+   *
+   * @return int error code, 0 if successful
+   */
+  int MatrixHandlerCuda::computeSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C, ScaleAddBufferCUDA* pattern)
+  {
+    auto a_v = A->getValues(memory::DEVICE);
+    auto a_i = A->getRowData(memory::DEVICE);
+    auto a_j = A->getColData(memory::DEVICE);
+
+    auto b_v = B->getValues(memory::DEVICE);
+    auto b_i = B->getRowData(memory::DEVICE);
+    auto b_j = B->getColData(memory::DEVICE);
+
+    auto c_v = C->getValues(memory::DEVICE);
+    auto c_i = C->getRowData(memory::DEVICE);
+    auto c_j = C->getColData(memory::DEVICE);
+
+    auto handle = workspace_->getCusparseHandle();
+
+    index_type m = A->getNumRows();
+    assert(m == B->getNumRows());
+    assert(m == C->getNumRows());
+
+    index_type n = A->getNumColumns();
+    assert(n == B->getNumColumns());
+    assert(n == C->getNumColumns());
+
+    assert(m == n);
+
+    index_type nnz_a = A->getNnz();
+    index_type nnz_b = B->getNnz();
+
+    int                info    = mem_.copyArrayDeviceToDevice(c_i, pattern->getRowData(), n + 1);
+    cusparseMatDescr_t descr_a = pattern->getMatrixDescriptor();
+    cusparseStatus_t   cu_info = cusparseDcsrgeam2(handle, m, n, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j, pattern->getBuffer());
+    info                       = info || (cu_info != CUSPARSE_STATUS_SUCCESS);
+    info                       = info || C->setUpdated(memory::DEVICE);
+    return info;
+  }
+
+  /**
+   * @brief Update matrix with a new number of nonzero elements
+   *
+   * @param[in,out] A - Sparse matrix
+   * @param[in] row_data - pointer to row data (array of integers)
+   * @param[in] col_data - pointer to column data (array of integers)
+   * @param[in] val_data - pointer to value data (array of real numbers)
+   * @param[in] nnz - number of non-zer
+   * @return 0 if successful, 1 otherwise
+   */
+  static int updateMatrix(matrix::Sparse* A, index_type* rowData, index_type* columnData, real_type* valData, index_type nnz)
+  {
+    if (A->destroyMatrixData(memory::DEVICE) != 0)
+    {
+      return 1;
+    }
+    if (A->destroyMatrixData(memory::HOST) != 0)
+    {
+      return 1;
+    }
+    return A->copyDataFrom(rowData, columnData, valData, nnz, memory::DEVICE, memory::DEVICE);
+  }
+
+  /**
+   * @brief Add a constant to the nonzero values of a csr matrix,
+   *        then add the identity matrix.
+   *
+   * @param[in,out] A - Sparse CSR matrix
+   * @param[in] alpha - constant to the added
+   * @return 0 if successful, 1 otherwise
+   */
+  int MatrixHandlerCuda::scaleAddI(matrix::Csr* A, real_type alpha)
+  {
+    index_type n = A->getNumRows();
+
+    std::vector<index_type> I_i(n + 1);
+    std::iota(I_i.begin(), I_i.end(), 0);
+
+    std::vector<index_type> I_j(n);
+    std::iota(I_j.begin(), I_j.end(), 0);
+
+    std::vector<real_type> I_v(n, 1.0);
+
+    matrix::Csr I(A->getNumRows(), A->getNumColumns(), n);
+    int         info = I.copyDataFrom(I_i.data(), I_j.data(), I_v.data(), n, memory::HOST, memory::DEVICE);
+
+    // Reuse sparsity pattern if it is available
+    if (workspace_->scaleAddISetup())
+    {
+      ScaleAddBufferCUDA* pattern = workspace_->getScaleAddIBuffer();
+      matrix::Csr         C(A->getNumRows(), A->getNumColumns(), pattern->getNnz());
+      info = info || C.allocateMatrixData(memory::DEVICE);
+      info = info || computeSum(A, alpha, &I, 1., &C, pattern);
+      info = info || updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz());
+    }
+    else
+    {
+      ScaleAddBufferCUDA* pattern = nullptr;
+      matrix::Csr         C(A->getNumRows(), A->getNumColumns(), A->getNnz());
+      info = info || allocateForSum(A, alpha, &I, 1., &pattern);
+      workspace_->setScaleAddIBuffer(pattern);
+      workspace_->scaleAddISetupDone();
+      C.setNnz(pattern->getNnz());
+      C.allocateMatrixData(memory::DEVICE);
+      info = info || computeSum(A, alpha, &I, 1., &C, pattern);
+      info = info || updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz());
+    }
+    return info;
+  }
+
+  /**
+   * @brief Multiply csr matrix by a constant and add B.
+   *
+   * @param[in,out] A - Sparse CSR matrix
+   * @param[in] alpha - constant to the added
+   * @param[in] B - Sparse CSR matrix
+   * @return 0 if successful, 1 otherwise
+   */
+  int MatrixHandlerCuda::scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B)
+  {
+    int info = 0;
+    // Reuse sparsity pattern if it is available
+    if (workspace_->scaleAddBSetup())
+    {
+      ScaleAddBufferCUDA* pattern = workspace_->getScaleAddBBuffer();
+      matrix::Csr         C(A->getNumRows(), A->getNumColumns(), pattern->getNnz());
+      info = info || C.allocateMatrixData(memory::DEVICE);
+      info = info || computeSum(A, alpha, B, 1., &C, pattern);
+      info = info || updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz());
+    }
+    else
+    {
+      ScaleAddBufferCUDA* pattern = nullptr;
+      matrix::Csr         C(A->getNumRows(), A->getNumColumns(), A->getNnz());
+      info = info || allocateForSum(A, alpha, B, 1., &pattern);
+      workspace_->setScaleAddBBuffer(pattern);
+      workspace_->scaleAddBSetupDone();
+      C.setNnz(pattern->getNnz());
+      C.allocateMatrixData(memory::DEVICE);
+      info = info || computeSum(A, alpha, B, 1., &C, pattern);
+      info = info || updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz());
+    }
+    return info;
+  }
+
 } // namespace ReSolve
diff --git a/resolve/matrix/MatrixHandlerCuda.hpp b/resolve/matrix/MatrixHandlerCuda.hpp
index f0efbcee..7441ba1e 100644
--- a/resolve/matrix/MatrixHandlerCuda.hpp
+++ b/resolve/matrix/MatrixHandlerCuda.hpp
@@ -1,10 +1,16 @@
 #pragma once
+
+#include <cusparse.h>
+
 #include <resolve/Common.hpp>
 #include <resolve/MemoryUtils.hpp>
 #include <resolve/matrix/MatrixHandlerImpl.hpp>
 
 namespace ReSolve
 {
+
+  class ScaleAddBufferCUDA;
+
   namespace vector
   {
     class Vector;
@@ -45,6 +51,10 @@ namespace ReSolve
 
     int rightScale(matrix::Csr* A, vector_type* diag) override;
 
+    int scaleAddI(matrix::Csr* A, real_type alpha) override;
+
+    int scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B) override;
+
     virtual int matvec(matrix::Sparse*  A,
                        vector_type*     vec_x,
                        vector_type*     vec_result,
@@ -55,6 +65,8 @@ namespace ReSolve
     void setValuesChanged(bool isValuesChanged) override;
 
   private:
+    int                  allocateForSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, ScaleAddBufferCUDA** pattern);
+    int                  computeSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C, ScaleAddBufferCUDA* pattern);
     LinAlgWorkspaceCUDA* workspace_{nullptr};
     bool                 values_changed_{true}; ///< needed for matvec
 
diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp
index 17a31bc3..20a28e3a 100644
--- a/resolve/matrix/MatrixHandlerHip.cpp
+++ b/resolve/matrix/MatrixHandlerHip.cpp
@@ -1,6 +1,8 @@
 #include "MatrixHandlerHip.hpp"
 
 #include <algorithm>
+#include <cassert>
+#include <numeric>
 
 #include <resolve/hip/hipKernels.h>
 #include <resolve/hip/hipVectorKernels.h>
@@ -11,6 +13,7 @@
 #include <resolve/utilities/logger/Logger.hpp>
 #include <resolve/vector/Vector.hpp>
 #include <resolve/workspace/LinAlgWorkspaceHIP.hpp>
+#include <resolve/workspace/ScaleAddBufferHIP.hpp>
 
 namespace ReSolve
 {
@@ -369,4 +372,200 @@ namespace ReSolve
     return 0;
   }
 
+  /**
+   * @brief Calculate buffer size and sparsity pattern of alpha*A + beta*B
+   *
+   * @param[in] A - CSR matrix
+   * @param[in] B - CSR matrix
+   * @param[in, out] pattern - sparsity pattern and buffer
+   *
+   * @return int error code, 0 if successful
+   */
+  int MatrixHandlerHip::allocateForSum(matrix::Csr* A, matrix::Csr* B, ScaleAddBufferHIP** pattern)
+  {
+    auto             handle   = workspace_->getRocsparseHandle();
+    rocsparse_status roc_info = rocsparse_set_pointer_mode(handle, rocsparse_pointer_mode_host);
+    int              info     = (roc_info != rocsparse_status_success);
+    workspace_->setRocsparseHandle(handle);
+
+    auto a_i = A->getRowData(memory::DEVICE);
+    auto a_j = A->getColData(memory::DEVICE);
+
+    auto b_i = B->getRowData(memory::DEVICE);
+    auto b_j = B->getColData(memory::DEVICE);
+
+    index_type m = A->getNumRows();
+    assert(m == B->getNumRows());
+    index_type n = A->getNumColumns();
+    assert(n == B->getNumColumns());
+
+    assert(m == n);
+
+    index_type nnz_a = A->getNnz();
+    index_type nnz_b = B->getNnz();
+
+    *pattern                    = new ScaleAddBufferHIP(n + 1);
+    rocsparse_mat_descr descr_a = (*pattern)->getMatrixDescriptor();
+    index_type          nnz_total;
+    // determines sum row offsets and total number of nonzeros
+    roc_info = rocsparse_bsrgeam_nnzb(handle, rocsparse_direction_row, m, n, 1, descr_a, nnz_a, a_i, a_j, descr_a, nnz_b, b_i, b_j, descr_a, (*pattern)->getRowData(), &nnz_total);
+    info     = info || (roc_info != rocsparse_status_success);
+    (*pattern)->setNnz(nnz_total);
+    return info;
+  }
+
+  /**
+   * @brief Given sparsity pattern, calculate alpha*A + beta*B
+   *
+   * @param[in] A - CSR matrix
+   * @param[in] alpha - constant to be added to A
+   * @param[in] B - CSR matrix
+   * @param[in] beta - constant to be added to B
+   * @param[in, out] pattern - sparsity pattern and buffer
+   *
+   * @return int error code, 0 if successful
+   */
+  int MatrixHandlerHip::computeSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C, ScaleAddBufferHIP* pattern)
+  {
+    auto a_v = A->getValues(memory::DEVICE);
+    auto a_i = A->getRowData(memory::DEVICE);
+    auto a_j = A->getColData(memory::DEVICE);
+
+    auto b_v = B->getValues(memory::DEVICE);
+    auto b_i = B->getRowData(memory::DEVICE);
+    auto b_j = B->getColData(memory::DEVICE);
+
+    auto c_v = C->getValues(memory::DEVICE);
+    auto c_i = C->getRowData(memory::DEVICE);
+    auto c_j = C->getColData(memory::DEVICE);
+
+    auto handle = workspace_->getRocsparseHandle();
+
+    index_type m = A->getNumRows();
+    assert(m == B->getNumRows());
+    assert(m == C->getNumRows());
+
+    index_type n = A->getNumColumns();
+    assert(n == B->getNumColumns());
+    assert(n == C->getNumColumns());
+
+    assert(m == n);
+
+    index_type nnz_a = A->getNnz();
+    index_type nnz_b = B->getNnz();
+
+    int                 info     = mem_.copyArrayDeviceToDevice(c_i, pattern->getRowData(), n + 1);
+    rocsparse_mat_descr descr_a  = pattern->getMatrixDescriptor();
+    rocsparse_status    roc_info = rocsparse_dbsrgeam(handle, rocsparse_direction_row, m, n, 1, &alpha, descr_a, nnz_a, a_v, a_i, a_j, &beta, descr_a, nnz_b, b_v, b_i, b_j, descr_a, c_v, c_i, c_j);
+    info                         = info || (roc_info != rocsparse_status_success);
+    info                         = info || C->setUpdated(memory::DEVICE);
+    return info;
+  }
+
+  /**
+   * @brief Update matrix with a new number of nonzero elements
+   *
+   * @param[in,out] A - Sparse matrix
+   * @param[in] row_data - pointer to row data (array of integers)
+   * @param[in] col_data - pointer to column data (array of integers)
+   * @param[in] val_data - pointer to value data (array of real numbers)
+   * @param[in] nnz - number of non-zer
+   * @return 0 if successful, 1 otherwise
+   */
+  static int updateMatrix(matrix::Sparse* A, index_type* rowData, index_type* columnData, real_type* valData, index_type nnz)
+  {
+    if (A->destroyMatrixData(memory::DEVICE) != 0)
+    {
+      return 1;
+    }
+    if (A->destroyMatrixData(memory::HOST) != 0)
+    {
+      return 1;
+    }
+    return A->copyDataFrom(rowData, columnData, valData, nnz, memory::DEVICE, memory::DEVICE);
+  }
+
+  /**
+   * @brief Add a constant to the nonzero values of a csr matrix,
+   *        then add the identity matrix.
+   *
+   * @param[in,out] A - Sparse CSR matrix
+   * @param[in] alpha - constant to the added
+   * @return 0 if successful, 1 otherwise
+   */
+  int MatrixHandlerHip::scaleAddI(matrix::Csr* A, real_type alpha)
+  {
+    index_type n = A->getNumRows();
+
+    std::vector<index_type> I_i(n + 1);
+    std::iota(I_i.begin(), I_i.end(), 0);
+
+    std::vector<index_type> I_j(n);
+    std::iota(I_j.begin(), I_j.end(), 0);
+
+    std::vector<real_type> I_v(n, 1.0);
+
+    matrix::Csr I(A->getNumRows(), A->getNumColumns(), n);
+    int         info = I.copyDataFrom(I_i.data(), I_j.data(), I_v.data(), n, memory::HOST, memory::DEVICE);
+
+    // Reuse sparsity pattern if it is available
+    if (workspace_->scaleAddISetup())
+    {
+      ScaleAddBufferHIP* pattern = workspace_->getScaleAddIBuffer();
+      matrix::Csr        C(A->getNumRows(), A->getNumColumns(), pattern->getNnz());
+      info = info || C.allocateMatrixData(memory::DEVICE);
+      info = info || computeSum(A, alpha, &I, 1., &C, pattern);
+      info = info || updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz());
+    }
+    else
+    {
+      ScaleAddBufferHIP* pattern = nullptr;
+      matrix::Csr        C(A->getNumRows(), A->getNumColumns(), A->getNnz());
+      info = info || allocateForSum(A, &I, &pattern);
+
+      workspace_->setScaleAddIBuffer(pattern);
+      workspace_->scaleAddISetupDone();
+      C.setNnz(pattern->getNnz());
+      C.allocateMatrixData(memory::DEVICE);
+      info = info || computeSum(A, alpha, &I, 1., &C, pattern);
+      info = info || updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz());
+    }
+    return info;
+  }
+
+  /**
+   * @brief Multiply csr matrix by a constant and add B.
+   *
+   * @param[in,out] A - Sparse CSR matrix
+   * @param[in] alpha - constant to the added
+   * @param[in] B - Sparse CSR matrix
+   * @return 0 if successful, 1 otherwise
+   */
+  int MatrixHandlerHip::scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B)
+  {
+    int info = 0;
+    // Reuse sparsity pattern if it is available
+    if (workspace_->scaleAddBSetup())
+    {
+      ScaleAddBufferHIP* pattern = workspace_->getScaleAddBBuffer();
+      matrix::Csr        C(A->getNumRows(), A->getNumColumns(), pattern->getNnz());
+      info = info || C.allocateMatrixData(memory::DEVICE);
+      info = info || computeSum(A, alpha, B, 1., &C, pattern);
+      info = info || updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz());
+    }
+    else
+    {
+      ScaleAddBufferHIP* pattern = nullptr;
+      matrix::Csr        C(A->getNumRows(), A->getNumColumns(), A->getNnz());
+      info = info || allocateForSum(A, B, &pattern);
+      workspace_->setScaleAddBBuffer(pattern);
+      workspace_->scaleAddBSetupDone();
+      C.setNnz(pattern->getNnz());
+      C.allocateMatrixData(memory::DEVICE);
+      info = info || computeSum(A, alpha, B, 1., &C, pattern);
+      info = info || updateMatrix(A, C.getRowData(memory::DEVICE), C.getColData(memory::DEVICE), C.getValues(memory::DEVICE), C.getNnz());
+    }
+    return info;
+  }
+
 } // namespace ReSolve
diff --git a/resolve/matrix/MatrixHandlerHip.hpp b/resolve/matrix/MatrixHandlerHip.hpp
index 7e69c3f1..4f7c092b 100644
--- a/resolve/matrix/MatrixHandlerHip.hpp
+++ b/resolve/matrix/MatrixHandlerHip.hpp
@@ -17,6 +17,7 @@ namespace ReSolve
     class Csc;
     class Csr;
   } // namespace matrix
+  class ScaleAddBufferHIP;
   class LinAlgWorkspaceHIP;
 } // namespace ReSolve
 
@@ -43,7 +44,12 @@ namespace ReSolve
 
     int rightScale(matrix::Csr* A, vector_type* diag) override;
 
-    int         addConst(matrix::Sparse* A, real_type alpha) override;
+    int addConst(matrix::Sparse* A, real_type alpha) override;
+
+    int scaleAddI(matrix::Csr* A, real_type alpha) override;
+
+    int scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B) override;
+
     virtual int matvec(matrix::Sparse*  A,
                        vector_type*     vec_x,
                        vector_type*     vec_result,
@@ -55,6 +61,9 @@ namespace ReSolve
     void setValuesChanged(bool isValuesChanged) override;
 
   private:
+    int allocateForSum(matrix::Csr* A, matrix::Csr* B, ScaleAddBufferHIP** pattern);
+    int computeSum(matrix::Csr* A, real_type alpha, matrix::Csr* B, real_type beta, matrix::Csr* C, ScaleAddBufferHIP* pattern);
+
     LinAlgWorkspaceHIP* workspace_{nullptr};
     bool                values_changed_{true}; ///< needed for matvec
 
diff --git a/resolve/matrix/MatrixHandlerImpl.hpp b/resolve/matrix/MatrixHandlerImpl.hpp
index d874dff1..80fbb631 100644
--- a/resolve/matrix/MatrixHandlerImpl.hpp
+++ b/resolve/matrix/MatrixHandlerImpl.hpp
@@ -46,6 +46,10 @@ namespace ReSolve
 
     virtual int addConst(matrix::Sparse* A, real_type alpha) = 0;
 
+    virtual int scaleAddI(matrix::Csr* A, real_type alpha) = 0;
+
+    virtual int scaleAddB(matrix::Csr* A, real_type alpha, matrix::Csr* B) = 0;
+
     virtual int matvec(matrix::Sparse*  A,
                        vector_type*     vec_x,
                        vector_type*     vec_result,
diff --git a/resolve/workspace/CMakeLists.txt b/resolve/workspace/CMakeLists.txt
index d9849e6c..a0760396 100644
--- a/resolve/workspace/CMakeLists.txt
+++ b/resolve/workspace/CMakeLists.txt
@@ -7,15 +7,22 @@
 ]]
 
 # C++ code
-set(ReSolve_Workspace_SRC LinAlgWorkspaceCpu.cpp)
+set(ReSolve_Workspace_SRC ScaleAddBufferCpu.cpp LinAlgWorkspaceCpu.cpp)
 
 # C++ code that depends on CUDA SDK libraries
-set(ReSolve_Workspace_CUDASDK_SRC LinAlgWorkspaceCUDA.cpp)
+set(ReSolve_Workspace_CUDASDK_SRC ScaleAddBufferCUDA.cpp
+                                  LinAlgWorkspaceCUDA.cpp
+)
 
-set(ReSolve_Workspace_ROCM_SRC LinAlgWorkspaceHIP.cpp)
+set(ReSolve_Workspace_ROCM_SRC ScaleAddBufferHIP.cpp LinAlgWorkspaceHIP.cpp)
 
 set(ReSolve_Workspace_HEADER_INSTALL
-    LinAlgWorkspace.hpp LinAlgWorkspaceCpu.hpp LinAlgWorkspaceCUDA.hpp
+    ScaleAddBufferCpu.hpp
+    ScaleAddBufferCUDA.upp,
+    ScaleAddBufferHIP.hpp
+    LinAlgWorkspace.hpp
+    LinAlgWorkspaceCpu.hpp
+    LinAlgWorkspaceCUDA.hpp
     LinAlgWorkspaceHIP.hpp
 )
 
diff --git a/resolve/workspace/LinAlgWorkspaceCUDA.cpp b/resolve/workspace/LinAlgWorkspaceCUDA.cpp
index fdb19c30..57987666 100644
--- a/resolve/workspace/LinAlgWorkspaceCUDA.cpp
+++ b/resolve/workspace/LinAlgWorkspaceCUDA.cpp
@@ -1,4 +1,5 @@
 #include <resolve/workspace/LinAlgWorkspaceCUDA.hpp>
+#include <resolve/workspace/ScaleAddBufferCUDA.hpp>
 
 namespace ReSolve
 {
@@ -8,6 +9,8 @@ namespace ReSolve
     handle_cusparse_           = nullptr;
     handle_cublas_             = nullptr;
     buffer_spmv_               = nullptr;
+    buffer_scale_add_i         = nullptr;
+    buffer_scale_add_b         = nullptr;
     buffer_1norm_              = nullptr;
     transpose_workspace_       = nullptr;
     transpose_workspace_ready_ = false;
@@ -21,6 +24,10 @@ namespace ReSolve
   {
     if (buffer_spmv_ != nullptr)
       mem_.deleteOnDevice(buffer_spmv_);
+    if (buffer_scale_add_i != nullptr)
+      delete buffer_scale_add_i;
+    if (buffer_scale_add_b != nullptr)
+      delete buffer_scale_add_b;
     if (d_r_size_ != 0)
       mem_.deleteOnDevice(d_r_);
     if (norm_buffer_ready_)
@@ -70,6 +77,18 @@ namespace ReSolve
       transpose_workspace_       = nullptr;
       transpose_workspace_ready_ = false;
     }
+    if (scale_add_b_setup_done_)
+    {
+      delete buffer_scale_add_b;
+      buffer_scale_add_b      = nullptr;
+      scale_add_b_setup_done_ = false;
+    }
+    if (scale_add_i_setup_done_)
+    {
+      delete buffer_scale_add_i;
+      buffer_scale_add_i      = nullptr;
+      scale_add_i_setup_done_ = false;
+    }
     return;
   }
 
@@ -83,6 +102,16 @@ namespace ReSolve
     return buffer_1norm_;
   }
 
+  ScaleAddBufferCUDA* LinAlgWorkspaceCUDA::getScaleAddIBuffer()
+  {
+    return buffer_scale_add_i;
+  }
+
+  ScaleAddBufferCUDA* LinAlgWorkspaceCUDA::getScaleAddBBuffer()
+  {
+    return buffer_scale_add_b;
+  }
+
   void* LinAlgWorkspaceCUDA::getTransposeBufferWorkspace()
   {
     return transpose_workspace_;
@@ -114,6 +143,26 @@ namespace ReSolve
     buffer_spmv_ = buffer;
   }
 
+  void LinAlgWorkspaceCUDA::setScaleAddBBuffer(ScaleAddBufferCUDA* buffer)
+  {
+    buffer_scale_add_b = buffer;
+  }
+
+  void LinAlgWorkspaceCUDA::setScaleAddIBuffer(ScaleAddBufferCUDA* buffer)
+  {
+    buffer_scale_add_i = buffer;
+  }
+
+  void LinAlgWorkspaceCUDA::scaleAddBSetupDone()
+  {
+    scale_add_b_setup_done_ = true;
+  }
+
+  void LinAlgWorkspaceCUDA::scaleAddISetupDone()
+  {
+    scale_add_i_setup_done_ = true;
+  }
+
   void LinAlgWorkspaceCUDA::setNormBuffer(void* buffer)
   {
     buffer_1norm_ = buffer;
@@ -204,6 +253,16 @@ namespace ReSolve
     matvec_setup_done_ = true;
   }
 
+  bool LinAlgWorkspaceCUDA::scaleAddISetup()
+  {
+    return scale_add_i_setup_done_;
+  }
+
+  bool LinAlgWorkspaceCUDA::scaleAddBSetup()
+  {
+    return scale_add_b_setup_done_;
+  }
+
   void LinAlgWorkspaceCUDA::initializeHandles()
   {
     cusparseCreate(&handle_cusparse_);
diff --git a/resolve/workspace/LinAlgWorkspaceCUDA.hpp b/resolve/workspace/LinAlgWorkspaceCUDA.hpp
index ac996b9b..01b30f6e 100644
--- a/resolve/workspace/LinAlgWorkspaceCUDA.hpp
+++ b/resolve/workspace/LinAlgWorkspaceCUDA.hpp
@@ -8,6 +8,9 @@
 
 namespace ReSolve
 {
+
+  class ScaleAddBufferCUDA;
+
   class LinAlgWorkspaceCUDA
   {
   public:
@@ -17,13 +20,19 @@ namespace ReSolve
     void resetLinAlgWorkspace();
 
     // accessors
-    void* getSpmvBuffer();
-    void* getNormBuffer();
-    void* getTransposeBufferWorkspace();
-    void  setTransposeBufferWorkspace(size_t bufferSize);
-    bool  isTransposeBufferAllocated();
-    void  setSpmvBuffer(void* buffer);
-    void  setNormBuffer(void* buffer);
+    void*               getSpmvBuffer();
+    void*               getNormBuffer();
+    ScaleAddBufferCUDA* getScaleAddIBuffer();
+    ScaleAddBufferCUDA* getScaleAddBBuffer();
+    void*               getTransposeBufferWorkspace();
+    void                setTransposeBufferWorkspace(size_t bufferSize);
+    bool                isTransposeBufferAllocated();
+    void                setSpmvBuffer(void* buffer);
+    void                setNormBuffer(void* buffer);
+    void                setScaleAddIBuffer(ScaleAddBufferCUDA* buffer);
+    void                setScaleAddBBuffer(ScaleAddBufferCUDA* buffer);
+    void                scaleAddISetupDone();
+    void                scaleAddBSetupDone();
 
     cublasHandle_t       getCublasHandle();
     cusolverSpHandle_t   getCusolverSpHandle(); // needed for 1-norms etc
@@ -48,6 +57,9 @@ namespace ReSolve
     bool matvecSetup();
     void matvecSetupDone();
 
+    bool scaleAddISetup();
+    bool scaleAddBSetup();
+
   private:
     // handles
     cublasHandle_t     handle_cublas_;
@@ -62,10 +74,14 @@ namespace ReSolve
     cusparseDnVecDescr_t vec_y_;
 
     // buffers
-    void* buffer_spmv_{nullptr};
-    void* buffer_1norm_{nullptr};
+    void*               buffer_spmv_{nullptr};
+    void*               buffer_1norm_{nullptr};
+    ScaleAddBufferCUDA* buffer_scale_add_i{nullptr};
+    ScaleAddBufferCUDA* buffer_scale_add_b{nullptr};
 
     bool matvec_setup_done_{false}; // check if setup is done for matvec i.e. if buffer is allocated, csr structure is set etc.
+    bool scale_add_i_setup_done_{false};
+    bool scale_add_b_setup_done_{false};
 
     void* transpose_workspace_{nullptr};     // needed for transpose
     bool  transpose_workspace_ready_{false}; // to track if allocated
diff --git a/resolve/workspace/LinAlgWorkspaceCpu.cpp b/resolve/workspace/LinAlgWorkspaceCpu.cpp
index 36f9d6cf..e632afa9 100644
--- a/resolve/workspace/LinAlgWorkspaceCpu.cpp
+++ b/resolve/workspace/LinAlgWorkspaceCpu.cpp
@@ -1,7 +1,11 @@
 #include "LinAlgWorkspaceCpu.hpp"
 
+#include <algorithm>
+#include <cassert>
 #include <cstddef>
 
+#include <resolve/workspace/ScaleAddBufferCpu.hpp>
+
 namespace ReSolve
 {
   LinAlgWorkspaceCpu::LinAlgWorkspaceCpu()
@@ -10,6 +14,8 @@ namespace ReSolve
 
   LinAlgWorkspaceCpu::~LinAlgWorkspaceCpu()
   {
+    delete scaleAddIBuffer_;
+    delete scaleAddBBuffer_;
   }
 
   void LinAlgWorkspaceCpu::initializeHandles()
@@ -18,7 +24,59 @@ namespace ReSolve
 
   void LinAlgWorkspaceCpu::resetLinAlgWorkspace()
   {
-    // No resources to reset in CPU workspace
-    return;
+    delete scaleAddIBuffer_;
+    scaleAddIBuffer_    = nullptr;
+    scaleAddISetupDone_ = false;
+    delete scaleAddBBuffer_;
+    scaleAddBBuffer_    = nullptr;
+    scaleAddBSetupDone_ = false;
+  }
+
+  bool LinAlgWorkspaceCpu::scaleAddISetup()
+  {
+    return scaleAddISetupDone_;
+  }
+
+  void LinAlgWorkspaceCpu::scaleAddISetupDone()
+  {
+    scaleAddISetupDone_ = true;
+  }
+
+  ScaleAddBufferCpu* LinAlgWorkspaceCpu::getScaleAddIBuffer()
+  {
+    assert(scaleAddIBuffer_ != nullptr);
+    return scaleAddIBuffer_;
+  }
+
+  void LinAlgWorkspaceCpu::setScaleAddIBuffer(ScaleAddBufferCpu* buffer)
+  {
+    assert(buffer != nullptr);
+    assert(scaleAddIBuffer_ == nullptr);
+    scaleAddIBuffer_ = buffer;
+    scaleAddISetupDone();
   }
+
+  bool LinAlgWorkspaceCpu::scaleAddBSetup()
+  {
+    return scaleAddBSetupDone_;
+  }
+
+  void LinAlgWorkspaceCpu::scaleAddBSetupDone()
+  {
+    scaleAddBSetupDone_ = true;
+  }
+
+  ScaleAddBufferCpu* LinAlgWorkspaceCpu::getScaleAddBBuffer()
+  {
+    assert(scaleAddBBuffer_ != nullptr);
+    return scaleAddBBuffer_;
+  }
+
+  void LinAlgWorkspaceCpu::setScaleAddBBuffer(ScaleAddBufferCpu* buffer)
+  {
+    assert(scaleAddBBuffer_ == nullptr);
+    scaleAddBBuffer_ = buffer;
+    scaleAddBSetupDone();
+  }
+
 } // namespace ReSolve
diff --git a/resolve/workspace/LinAlgWorkspaceCpu.hpp b/resolve/workspace/LinAlgWorkspaceCpu.hpp
index 308c2497..7c0b758c 100644
--- a/resolve/workspace/LinAlgWorkspaceCpu.hpp
+++ b/resolve/workspace/LinAlgWorkspaceCpu.hpp
@@ -1,14 +1,34 @@
 #pragma once
 
+#include <resolve/Common.hpp>
+
 namespace ReSolve
 {
+  class ScaleAddBufferCpu;
+
   class LinAlgWorkspaceCpu
   {
   public:
     LinAlgWorkspaceCpu();
     ~LinAlgWorkspaceCpu();
-    void initializeHandles();
-    void resetLinAlgWorkspace();
+    void               initializeHandles();
+    void               resetLinAlgWorkspace();
+    bool               scaleAddISetup();
+    void               scaleAddISetupDone();
+    ScaleAddBufferCpu* getScaleAddIBuffer();
+    void               setScaleAddIBuffer(ScaleAddBufferCpu* buffer);
+    bool               scaleAddBSetup();
+    void               scaleAddBSetupDone();
+    ScaleAddBufferCpu* getScaleAddBBuffer();
+    void               setScaleAddBBuffer(ScaleAddBufferCpu* buffer);
+
+  private:
+    // check if setup is done for scaleAddI i.e. if buffer is allocated, csr structure is set etc.
+    bool               scaleAddISetupDone_{false};
+    ScaleAddBufferCpu* scaleAddIBuffer_{nullptr};
+    // check if setup is done for scaleAddB i.e. if buffer is allocated, csr structure is set etc.
+    bool               scaleAddBSetupDone_{false};
+    ScaleAddBufferCpu* scaleAddBBuffer_{nullptr};
   };
 
 } // namespace ReSolve
diff --git a/resolve/workspace/LinAlgWorkspaceHIP.cpp b/resolve/workspace/LinAlgWorkspaceHIP.cpp
index fc13e689..1504a42c 100644
--- a/resolve/workspace/LinAlgWorkspaceHIP.cpp
+++ b/resolve/workspace/LinAlgWorkspaceHIP.cpp
@@ -1,12 +1,16 @@
+#include <cassert>
+
 #include <resolve/workspace/LinAlgWorkspaceHIP.hpp>
+#include <resolve/workspace/ScaleAddBufferHIP.hpp>
 
 namespace ReSolve
 {
   LinAlgWorkspaceHIP::LinAlgWorkspaceHIP()
   {
-    handle_rocsparse_ = nullptr;
-    handle_rocblas_   = nullptr;
-
+    handle_rocsparse_          = nullptr;
+    handle_rocblas_            = nullptr;
+    buffer_scale_add_i_        = nullptr;
+    buffer_scale_add_b_        = nullptr;
     matvec_setup_done_         = false;
     d_r_                       = nullptr;
     d_r_size_                  = 0;
@@ -24,6 +28,16 @@ namespace ReSolve
     {
       rocsparse_destroy_mat_descr(mat_A_);
     }
+    if (scale_add_i_setup_done_)
+    {
+      assert(buffer_scale_add_i_ != nullptr);
+      delete buffer_scale_add_i_;
+    }
+    if (buffer_scale_add_b_ != nullptr)
+    {
+      assert(buffer_scale_add_b_ != nullptr);
+      delete buffer_scale_add_b_;
+    }
     if (d_r_size_ != 0)
     {
       mem_.deleteOnDevice(d_r_);
@@ -52,6 +66,20 @@ namespace ReSolve
       rocsparse_destroy_mat_descr(mat_A_);
       matvec_setup_done_ = false;
     }
+    if (scale_add_b_setup_done_)
+    {
+      assert(buffer_scale_add_b_ != nullptr);
+      delete buffer_scale_add_b_;
+      buffer_scale_add_b_     = nullptr;
+      scale_add_b_setup_done_ = false;
+    }
+    if (scale_add_i_setup_done_)
+    {
+      assert(buffer_scale_add_i_ != nullptr);
+      delete buffer_scale_add_i_;
+      buffer_scale_add_i_     = nullptr;
+      scale_add_i_setup_done_ = false;
+    }
     if (d_r_size_ != 0)
     {
       mem_.deleteOnDevice(d_r_);
@@ -128,6 +156,36 @@ namespace ReSolve
     norm_buffer_ = nb;
   }
 
+  ScaleAddBufferHIP* LinAlgWorkspaceHIP::getScaleAddIBuffer()
+  {
+    return buffer_scale_add_i_;
+  }
+
+  ScaleAddBufferHIP* LinAlgWorkspaceHIP::getScaleAddBBuffer()
+  {
+    return buffer_scale_add_b_;
+  }
+
+  void LinAlgWorkspaceHIP::setScaleAddBBuffer(ScaleAddBufferHIP* buffer)
+  {
+    buffer_scale_add_b_ = buffer;
+  }
+
+  void LinAlgWorkspaceHIP::setScaleAddIBuffer(ScaleAddBufferHIP* buffer)
+  {
+    buffer_scale_add_i_ = buffer;
+  }
+
+  void LinAlgWorkspaceHIP::scaleAddBSetupDone()
+  {
+    scale_add_b_setup_done_ = true;
+  }
+
+  void LinAlgWorkspaceHIP::scaleAddISetupDone()
+  {
+    scale_add_i_setup_done_ = true;
+  }
+
   void LinAlgWorkspaceHIP::setNormBufferState(bool r)
   {
     norm_buffer_ready_ = r;
@@ -143,6 +201,16 @@ namespace ReSolve
     matvec_setup_done_ = true;
   }
 
+  bool LinAlgWorkspaceHIP::scaleAddISetup()
+  {
+    return scale_add_i_setup_done_;
+  }
+
+  bool LinAlgWorkspaceHIP::scaleAddBSetup()
+  {
+    return scale_add_b_setup_done_;
+  }
+
   void LinAlgWorkspaceHIP::initializeHandles()
   {
     rocsparse_create_handle(&handle_rocsparse_);
diff --git a/resolve/workspace/LinAlgWorkspaceHIP.hpp b/resolve/workspace/LinAlgWorkspaceHIP.hpp
index 4db4b89f..fe08d961 100644
--- a/resolve/workspace/LinAlgWorkspaceHIP.hpp
+++ b/resolve/workspace/LinAlgWorkspaceHIP.hpp
@@ -9,6 +9,8 @@
 
 namespace ReSolve
 {
+  class ScaleAddBufferHIP;
+
   class LinAlgWorkspaceHIP
   {
   public:
@@ -24,6 +26,10 @@ namespace ReSolve
     index_type          getDrSize();
     real_type*          getDr();
     real_type*          getNormBuffer();
+    void                setScaleAddIBuffer(ScaleAddBufferHIP* buffer);
+    void                setScaleAddBBuffer(ScaleAddBufferHIP* buffer);
+    ScaleAddBufferHIP*  getScaleAddIBuffer();
+    ScaleAddBufferHIP*  getScaleAddBBuffer();
     void*               getTransposeBufferWorkspace();
     void                setTransposeBufferWorkspace(size_t bufferSize);
     bool                getNormBufferState();
@@ -32,12 +38,17 @@ namespace ReSolve
     void setRocblasHandle(rocblas_handle handle);
     void setRocsparseHandle(rocsparse_handle handle);
     void setSpmvMatrixDescriptor(rocsparse_mat_descr mat);
+
     void setSpmvMatrixInfo(rocsparse_mat_info info);
 
     void initializeHandles();
 
     bool matvecSetup();
     void matvecSetupDone();
+    bool scaleAddISetup();
+    bool scaleAddBSetup();
+    void scaleAddISetupDone();
+    void scaleAddBSetupDone();
 
     void setDrSize(index_type new_sz);
     void setDr(real_type* new_dr);
@@ -51,23 +62,26 @@ namespace ReSolve
 
     // matrix descriptors
     rocsparse_mat_descr mat_A_;
-
     // vector descriptors not needed, rocsparse uses RAW pointers.
 
     // buffers
     //  there is no buffer needed in matvec
     bool matvec_setup_done_{false}; // check if setup is done for matvec (note: no buffer but there is analysis)
+    bool scale_add_i_setup_done_{false};
+    bool scale_add_b_setup_done_{false};
 
     // info - but we need info
     rocsparse_mat_info info_A_;
 
-    real_type*    d_r_{nullptr};                     // needed for inf-norm
-    real_type*    norm_buffer_{nullptr};             // needed for inf-norm
-    void*         transpose_workspace_{nullptr};     // needed for transpose
-    bool          transpose_workspace_ready_{false}; // to track if allocated
-    index_type    d_r_size_{0};
-    bool          norm_buffer_ready_{false}; // to track if allocated
-    MemoryHandler mem_;                      ///< Memory handler not needed for now
+    real_type*         d_r_{nullptr};                     // needed for inf-norm
+    real_type*         norm_buffer_{nullptr};             // needed for inf-norm
+    void*              transpose_workspace_{nullptr};     // needed for transpose
+    bool               transpose_workspace_ready_{false}; // to track if allocated
+    index_type         d_r_size_{0};
+    bool               norm_buffer_ready_{false}; // to track if allocated
+    ScaleAddBufferHIP* buffer_scale_add_i_{nullptr};
+    ScaleAddBufferHIP* buffer_scale_add_b_{nullptr};
+    MemoryHandler      mem_; ///< Memory handler not needed for now
   };
 
 } // namespace ReSolve
diff --git a/resolve/workspace/ScaleAddBufferCUDA.cpp b/resolve/workspace/ScaleAddBufferCUDA.cpp
new file mode 100644
index 00000000..7bbd33f4
--- /dev/null
+++ b/resolve/workspace/ScaleAddBufferCUDA.cpp
@@ -0,0 +1,102 @@
+#include <resolve/workspace/ScaleAddBufferCUDA.hpp>
+
+namespace ReSolve
+{
+
+  /**
+   * @brief Store sparsity pattern
+   *
+   * @param[in] nrows - number of rows
+   */
+  ScaleAddBufferCUDA::ScaleAddBufferCUDA(index_type numRows)
+    : numRows_(numRows), bufferSize_(0)
+  {
+    mem_.allocateArrayOnDevice(&rowData_, numRows_ + 1);
+    cusparseCreateMatDescr(&mat_A_);
+    cusparseSetMatType(mat_A_, CUSPARSE_MATRIX_TYPE_GENERAL);
+    cusparseSetMatIndexBase(mat_A_, CUSPARSE_INDEX_BASE_ZERO);
+  }
+
+  /**
+   * @brief Destructor
+   *
+   */
+  ScaleAddBufferCUDA::~ScaleAddBufferCUDA()
+  {
+    mem_.deleteOnDevice(rowData_);
+    mem_.deleteOnDevice(buffer_);
+    cusparseDestroyMatDescr(mat_A_);
+  }
+
+  /**
+   * @brief Retrieve row sparsity pattern
+   *
+   * @return precalculated row pointers
+   */
+  index_type* ScaleAddBufferCUDA::getRowData()
+  {
+    return rowData_;
+  }
+
+  /**
+   * @brief Retrieve matrix descriptor
+   *
+   * @return matrix descriptor set for scaleAddB, scaleAddI
+   */
+  cusparseMatDescr_t ScaleAddBufferCUDA::getMatrixDescriptor()
+  {
+    return mat_A_;
+  }
+
+  /**
+   * @brief Allocate memory for cusparse buffer
+   *
+   * @param[in] bufferSize calculated array size
+   */
+  void ScaleAddBufferCUDA::allocateBuffer(size_t bufferSize)
+  {
+    bufferSize_ = bufferSize;
+    mem_.allocateBufferOnDevice(&buffer_, bufferSize_);
+  }
+
+  /**
+   * @brief Retrieve cusparse buffer
+   *
+   * @return cusparse buffer
+   */
+  void* ScaleAddBufferCUDA::getBuffer()
+  {
+    return buffer_;
+  }
+
+  /**
+   * @brief get number of matrix rows
+   *
+   * @return number of matrix rows.
+   */
+  index_type ScaleAddBufferCUDA::getNumRows()
+  {
+    return numRows_;
+  }
+
+  /**
+   * @brief Get number of non-zeros.
+   *
+   * @return number of non-zeros
+   */
+  index_type ScaleAddBufferCUDA::getNnz()
+  {
+    return nnz_;
+  }
+
+  /**
+   * @brief set number of non-zeros.
+   *
+   * @param[in] nnz number of non-zeros
+   */
+  void ScaleAddBufferCUDA::setNnz(index_type nnz)
+  {
+    nnz_ = nnz;
+  }
+
+} // namespace ReSolve
diff --git a/resolve/workspace/ScaleAddBufferCUDA.hpp b/resolve/workspace/ScaleAddBufferCUDA.hpp
new file mode 100644
index 00000000..683eb389
--- /dev/null
+++ b/resolve/workspace/ScaleAddBufferCUDA.hpp
@@ -0,0 +1,33 @@
+#pragma once
+
+#include "cusparse.h"
+#include <resolve/Common.hpp>
+#include <resolve/MemoryUtils.hpp>
+
+namespace ReSolve
+{
+
+  class ScaleAddBufferCUDA
+  {
+  public:
+    ScaleAddBufferCUDA(index_type numRows);
+    ~ScaleAddBufferCUDA();
+    index_type*        getRowData();
+    void               allocateBuffer(size_t bufferSize);
+    void*              getBuffer();
+    cusparseMatDescr_t getMatrixDescriptor();
+    index_type         getNumRows();
+    void               setNnz(index_type nnz);
+    index_type         getNnz();
+
+  private:
+    index_type*        rowData_;
+    void*              buffer_;
+    cusparseMatDescr_t mat_A_;
+    index_type         numRows_;
+    index_type         nnz_;
+    size_t             bufferSize_;
+    MemoryHandler      mem_;
+  };
+
+} // namespace ReSolve
diff --git a/resolve/workspace/ScaleAddBufferCpu.cpp b/resolve/workspace/ScaleAddBufferCpu.cpp
new file mode 100644
index 00000000..4f9066e8
--- /dev/null
+++ b/resolve/workspace/ScaleAddBufferCpu.cpp
@@ -0,0 +1,69 @@
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+
+#include <resolve/workspace/ScaleAddBufferCpu.hpp>
+
+namespace ReSolve
+{
+  /**
+   * @brief Store sparsity pattern
+   *
+   * @param[in] row_data - row data (array of integers, length:nrows+1)
+   * @param[in] col_data - column data (array of integers, length: nnz)
+   */
+  ScaleAddBufferCpu::ScaleAddBufferCpu(std::vector<index_type> rowData, std::vector<index_type> colData)
+    : rowData_(std::move(rowData)), colData_(std::move(colData))
+  {
+  }
+
+  /**
+   * @brief Retrieve row sparsity pattern
+   *
+   * @return precalculated row pointers
+   */
+  index_type* ScaleAddBufferCpu::getRowData()
+  {
+    return rowData_.data();
+  }
+
+  /**
+   * @brief Retrieve column sparsity pattern
+   *
+   * @return precalculated column indices
+   */
+  index_type* ScaleAddBufferCpu::getColumnData()
+  {
+    return colData_.data();
+  }
+
+  /**
+   * @brief get number of matrix rows
+   *
+   * @return number of matrix rows.
+   */
+  index_type ScaleAddBufferCpu::getNumRows()
+  {
+    return static_cast<index_type>(rowData_.size()) - 1;
+  }
+
+  /**
+   * @brief get number of matrix columns
+   *
+   * @return number of matrix columns.
+   */
+  index_type ScaleAddBufferCpu::getNumColumns()
+  {
+    return getNumRows();
+  }
+
+  /**
+   * @brief Get number of non-zeros.
+   *
+   * @return number of non-zeros
+   */
+  index_type ScaleAddBufferCpu::getNnz()
+  {
+    return static_cast<index_type>(colData_.size());
+  }
+} // namespace ReSolve
diff --git a/resolve/workspace/ScaleAddBufferCpu.hpp b/resolve/workspace/ScaleAddBufferCpu.hpp
new file mode 100644
index 00000000..61fd014a
--- /dev/null
+++ b/resolve/workspace/ScaleAddBufferCpu.hpp
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <vector>
+
+#include <resolve/Common.hpp>
+
+namespace ReSolve
+{
+  class ScaleAddBufferCpu
+  {
+  public:
+    ScaleAddBufferCpu(std::vector<index_type> rowData, std::vector<index_type> colData);
+    index_type* getRowData();
+    index_type* getColumnData();
+    index_type  getNumRows();
+    index_type  getNumColumns();
+    index_type  getNnz();
+
+  private:
+    std::vector<index_type> rowData_;
+    std::vector<index_type> colData_;
+  };
+} // namespace ReSolve
diff --git a/resolve/workspace/ScaleAddBufferHIP.cpp b/resolve/workspace/ScaleAddBufferHIP.cpp
new file mode 100644
index 00000000..38e19a22
--- /dev/null
+++ b/resolve/workspace/ScaleAddBufferHIP.cpp
@@ -0,0 +1,81 @@
+#include <cassert>
+
+#include <resolve/workspace/ScaleAddBufferHIP.hpp>
+
+namespace ReSolve
+{
+  /**
+   * @brief Store sparsity pattern
+   *
+   * @param[in] row_data - pointer to row data (array of integers, length:nrows+1)
+   * @param[in] nrows - number of rows
+   * @param[in] col_data - pointer to column data (array of integers, length: nnz)
+   * @param[in] nnz - number of non-zeros
+   */
+  ScaleAddBufferHIP::ScaleAddBufferHIP(index_type numRows)
+    : numRows_(numRows)
+  {
+    rocsparse_create_mat_descr(&mat_A_);
+    mem_.allocateArrayOnDevice(&rowData_, numRows_ + 1);
+  }
+
+  /**
+   * @brief Destructor
+   *
+   */
+  ScaleAddBufferHIP::~ScaleAddBufferHIP()
+  {
+    mem_.deleteOnDevice(rowData_);
+    rocsparse_destroy_mat_descr(mat_A_);
+  }
+
+  /**
+   * @brief Retrieve row sparsity pattern
+   *
+   * @return precalculated row pointers
+   */
+  index_type* ScaleAddBufferHIP::getRowData()
+  {
+    return rowData_;
+  }
+
+  /**
+   * @brief Retrieve matrix descriptor
+   *
+   * @return matrix descriptor set for scaleAddB, scaleAddI
+   */
+  rocsparse_mat_descr ScaleAddBufferHIP::getMatrixDescriptor()
+  {
+    return mat_A_;
+  }
+
+  /**
+   * @brief get number of matrix rows
+   *
+   * @return number of matrix rows.
+   */
+  index_type ScaleAddBufferHIP::getNumRows()
+  {
+    return numRows_;
+  }
+
+  /**
+   * @brief Get number of non-zeros.
+   *
+   * @return number of non-zeros
+   */
+  index_type ScaleAddBufferHIP::getNnz()
+  {
+    return nnz_;
+  }
+
+  /**
+   * @brief Get number of non-zeros.
+   *
+   * @param[in] nnz number of non-zeros
+   */
+  void ScaleAddBufferHIP::setNnz(index_type nnz)
+  {
+    nnz_ = nnz;
+  }
+} // namespace ReSolve
diff --git a/resolve/workspace/ScaleAddBufferHIP.hpp b/resolve/workspace/ScaleAddBufferHIP.hpp
new file mode 100644
index 00000000..29299c00
--- /dev/null
+++ b/resolve/workspace/ScaleAddBufferHIP.hpp
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <hip/hip_runtime.h>
+#include <rocblas/rocblas.h>
+#include <rocsparse/rocsparse.h>
+
+#include <resolve/Common.hpp>
+#include <resolve/MemoryUtils.hpp>
+
+namespace ReSolve
+{
+
+  class ScaleAddBufferHIP
+  {
+  public:
+    ScaleAddBufferHIP(index_type numRows);
+    ~ScaleAddBufferHIP();
+    index_type*         getRowData();
+    rocsparse_mat_descr getMatrixDescriptor();
+    index_type          getNumRows();
+    void                setNnz(index_type nnz);
+    index_type          getNnz();
+
+  private:
+    index_type*         rowData_;
+    rocsparse_mat_descr mat_A_;
+    index_type          numRows_;
+    index_type          nnz_;
+    MemoryHandler       mem_;
+  };
+} // namespace ReSolve
diff --git a/tests/unit/matrix/MatrixHandlerTests.hpp b/tests/unit/matrix/MatrixHandlerTests.hpp
index e6fb0eac..047856f4 100644
--- a/tests/unit/matrix/MatrixHandlerTests.hpp
+++ b/tests/unit/matrix/MatrixHandlerTests.hpp
@@ -202,6 +202,92 @@ namespace ReSolve
         return status.report(testname.c_str());
       }
 
+      TestOutcome scaleAddI(index_type n)
+      {
+        TestStatus   status;
+        std::string  testname(__func__);
+        matrix::Csr* A   = createCsrMatrix(n);
+        real_type    val = 2.;
+
+        handler_.scaleAddI(A, val, memspace_);
+        if (memspace_ == memory::DEVICE)
+        {
+          A->syncData(memory::HOST);
+        }
+        // expected sum is 2*30+1
+        status *= verifyScaleAddICsrMatrix(A, 61);
+
+        // run again to reuse sparsity pattern.
+        handler_.scaleAddI(A, val, memspace_);
+        if (memspace_ == memory::DEVICE)
+        {
+          A->syncData(memory::HOST);
+        }
+        // expected sum is 2*61+1
+        status *= verifyScaleAddICsrMatrix(A, 123);
+
+        delete A;
+        return status.report(testname.c_str());
+      }
+
+      TestOutcome scaleAddIZero(index_type n)
+      {
+        TestStatus   status;
+        std::string  testname(__func__);
+        matrix::Csr* A   = createCsrEmptyMatrix(n);
+        real_type    val = 2.;
+
+        handler_.scaleAddI(A, val, memspace_);
+        if (memspace_ == memory::DEVICE)
+        {
+          A->syncData(memory::HOST);
+        }
+        // expected sum is 1
+        status *= verifyScaleAddICsrMatrix(A, 1);
+
+        // run again to reuse sparsity pattern.
+        handler_.scaleAddI(A, val, memspace_);
+        if (memspace_ == memory::DEVICE)
+        {
+          A->syncData(memory::HOST);
+        }
+        // expected sum is 2*1+1
+        status *= verifyScaleAddICsrMatrix(A, 3);
+
+        delete A;
+        return status.report(testname.c_str());
+      }
+
+      TestOutcome scaleAddB(index_type n)
+      {
+        TestStatus   status;
+        std::string  testname(__func__);
+        matrix::Csr* A   = createCsrMatrix(n);
+        matrix::Csr* B   = createCsrMatrix(n);
+        real_type    val = 2.;
+
+        handler_.scaleAddB(A, val, B, memspace_);
+        if (memspace_ == memory::DEVICE)
+        {
+          A->syncData(memory::HOST);
+        }
+        // expected sum is 2*30+30
+        status *= verifyScaleAddICsrMatrix(A, 90);
+
+        // run again to reuse sparsity pattern.
+        handler_.scaleAddB(A, val, B, memspace_);
+        if (memspace_ == memory::DEVICE)
+        {
+          A->syncData(memory::HOST);
+        }
+        // expected sum is 2*90+30
+        status *= verifyScaleAddICsrMatrix(A, 210);
+
+        delete A;
+        delete B;
+        return status.report(testname.c_str());
+      }
+
     private:
       ReSolve::MatrixHandler& handler_;
       memory::MemorySpace     memspace_{memory::HOST};
@@ -402,8 +488,8 @@ namespace ReSolve
        * @brief Verify structure of a CSR matrix with preset pattern.
        *
        * The sparsity structure corresponds to the CSR representation of a rectangular matrix
-       * created by createRectangularCscMatrix.
-       * The sparisty structure is upper bidiagonal if n==m,
+       * created by createRectangularCsrMatrix.
+       * The sparsity structure is upper bidiagonal if n==m,
        * with an extra entry in the first column.
        * If n>m A_{ij} is nonzero iff i==j, or i+m==j+n
        * if n<m A_{ij} is nonzero iff i==j, or i+m==j+n
@@ -519,6 +605,37 @@ namespace ReSolve
         return true;
       }
 
+      /**
+       * @brief Create a CSR matrix with preset sparsity structure
+       *
+       * The sparisty structure is such that each row has a different number of nonzeros
+       * The values are chosen so that the sum of each row is 30
+       *
+       * @param[in] n number of rows and columns
+       *
+       * @return matrix::Csr*
+       */
+      matrix::Csr* createCsrEmptyMatrix(const index_type n)
+      {
+        index_type   nnz = 0;
+        matrix::Csr* A   = new matrix::Csr(n, n, nnz);
+        A->allocateMatrixData(memory::HOST);
+
+        index_type* rowptr = A->getRowData(memory::HOST);
+
+        for (index_type i = 0; i < n + 1; ++i)
+        {
+          rowptr[i] = 0;
+        }
+        A->setUpdated(memory::HOST);
+
+        if (memspace_ == memory::DEVICE)
+        {
+          A->syncData(memspace_);
+        }
+        return A;
+      }
+
       /**
        * @brief Create a CSR matrix with preset sparsity structure
        *
@@ -678,6 +795,56 @@ namespace ReSolve
         return true;
       }
 
+      /**
+       * @brief Verify structure of a CSR matrix with preset pattern after scaleAddI
+       *
+       * The sparsity structure corresponds to the CSR representation of a square matrix
+       * created by createCsrMatrix, scaled by a constant and 1. added along the diagonal.
+       *
+       * @pre A is a valid, allocated CSR matrix
+       * @invariant A
+       * @param[in] A matrix::Csr* pointer to the matrix to be verified
+       * @param[in] expected sum of row elements
+       * @return bool true if the matrix is valid, false otherwise
+       */
+      bool verifyScaleAddICsrMatrix(matrix::Csr* A, real_type expected)
+      {
+        // Check if the matrix is valid
+        if (A == nullptr)
+        {
+          return false;
+        }
+
+        // Get matrix dimensions
+        const index_type n = A->getNumRows();
+        const index_type m = A->getNumColumns();
+
+        if (n != m)
+        {
+          return false;
+        }
+
+        index_type* scaled_row_ptr = A->getRowData(memory::HOST);
+        real_type*  scaled_value   = A->getValues(memory::HOST);
+
+        // Verify values - each element scaled by scale. Diagonal elements should be  1.
+        for (index_type i = 0; i < n; ++i)
+        {
+          real_type sum = 0.;
+          for (index_type j = scaled_row_ptr[i]; j < scaled_row_ptr[i + 1]; ++j)
+          {
+            sum += scaled_value[j];
+          }
+          if (sum != expected)
+          {
+            std::cout << "Mismatch at row " << i << ": scaled sum = " << sum << ", expected sum = " << expected << "\n";
+            return false;
+          }
+        }
+
+        return true;
+      }
+
       /**
        * @brief Verify structure of a CSR matrix with preset pattern that is right-scaled
        *
diff --git a/tests/unit/matrix/runMatrixHandlerTests.cpp b/tests/unit/matrix/runMatrixHandlerTests.cpp
index 84e9a601..5f6e7dcd 100644
--- a/tests/unit/matrix/runMatrixHandlerTests.cpp
+++ b/tests/unit/matrix/runMatrixHandlerTests.cpp
@@ -76,6 +76,13 @@ void runTests(const std::string& backend, ReSolve::tests::TestingResults& result
   result += test.rightScale(1024, 2048);
   workspace.resetLinAlgWorkspace();
   result += test.rightScale(2048, 1024);
+  workspace.resetLinAlgWorkspace();
+  result += test.scaleAddI(100);
+  workspace.resetLinAlgWorkspace();
+  result += test.scaleAddIZero(100);
+  workspace.resetLinAlgWorkspace();
+  result += test.scaleAddB(100);
+  workspace.resetLinAlgWorkspace();
   std::cout << "\n";
 }