imate
C++/CUDA Reference
cuMatrixOperations< DataType > Class Template Reference

A static class for matrix-vector operations, which are similar to the level-2 operations of the BLAS library. This class acts as a templated namespace, where all member methods are public and static. More...

#include <cu_matrix_operations.h>

Static Public Member Functions

static void dense_matvec (cublasHandle_t cublas_handle, const DataType *A, const DataType *b, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *c)
 Computes the matrix vector multiplication \( \boldsymbol{c} = \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is a dense matrix. More...
 
static void dense_matvec_plus (cublasHandle_t cublas_handle, const DataType *A, const DataType *b, const DataType alpha, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *c)
 Computes the operation \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is a dense matrix. More...
 
static void dense_transposed_matvec (cublasHandle_t cublas_handle, const DataType *A, const DataType *b, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *c)
 Computes matrix vector multiplication \(\boldsymbol{c} = \mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is dense, and \( \mathbf{A}^{\intercal} \) is the transpose of the matrix \( \mathbf{A} \). More...
 
static void dense_transposed_matvec_plus (cublasHandle_t cublas_handle, const DataType *A, const DataType *b, const DataType alpha, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *c)
 Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is dense, and \( \mathbf{A}^{\intercal} \) is the transpose of the matrix \( \mathbf{A} \). More...
 
static void csr_matvec (cusparseHandle_t cusparse_handle, const DataType *A_data, const LongIndexType *A_column_indices, const LongIndexType *A_index_pointer, const DataType *b, const LongIndexType num_rows, DataType *c)
 Computes \( \boldsymbol{c} = \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector. More...
 
static void csr_matvec_plus (cusparseHandle_t cusparse_handle, const DataType *A_data, const LongIndexType *A_column_indices, const LongIndexType *A_index_pointer, const DataType *b, const DataType alpha, const LongIndexType num_rows, DataType *c)
 Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector. More...
 
static void csr_transposed_matvec (cusparseHandle_t cusparse_handle, const DataType *A_data, const LongIndexType *A_column_indices, const LongIndexType *A_index_pointer, const DataType *b, const LongIndexType num_rows, const LongIndexType num_columns, DataType *c)
 Computes \(\boldsymbol{c} =\mathbf{A}^{\intercal} \boldsymbol{b}\) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector. More...
 
static void csr_transposed_matvec_plus (cusparseHandle_t cusparse_handle, const DataType *A_data, const LongIndexType *A_column_indices, const LongIndexType *A_index_pointer, const DataType *b, const DataType alpha, const LongIndexType num_rows, const LongIndexType num_columns, DataType *c)
 Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A}^{\intercal} \boldsymbol{b}\) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector. More...
 
static void csc_matvec (cusparseHandle_t cusparse_handle, const DataType *A_data, const LongIndexType *A_row_indices, const LongIndexType *A_index_pointer, const DataType *b, const LongIndexType num_rows, const LongIndexType num_columns, DataType *c)
 Computes \( \boldsymbol{c} = \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector. More...
 
static void csc_matvec_plus (cusparseHandle_t cusparse_handle, const DataType *A_data, const LongIndexType *A_row_indices, const LongIndexType *A_index_pointer, const DataType *b, const DataType alpha, const LongIndexType num_rows, const LongIndexType num_columns, DataType *c)
 Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector. More...
 
static void csc_transposed_matvec (cusparseHandle_t cusparse_handle, const DataType *A_data, const LongIndexType *A_row_indices, const LongIndexType *A_index_pointer, const DataType *b, const LongIndexType num_columns, DataType *c)
 Computes \(\boldsymbol{c} =\mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector. More...
 
static void csc_transposed_matvec_plus (cusparseHandle_t cusparse_handle, const DataType *A_data, const LongIndexType *A_row_indices, const LongIndexType *A_index_pointer, const DataType *b, const DataType alpha, const LongIndexType num_columns, DataType *c)
 Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector. More...
 
static void create_band_matrix (cusparseHandle_t cublas_handle, const DataType *diagonals, const DataType *supdiagonals, const IndexType non_zero_size, const FlagType tridiagonal, DataType **matrix)
 Creates bi-diagonal or symmetric tri-diagonal matrix from the diagonal array (diagonals) and off-diagonal array (supdiagonals). More...
 

Detailed Description

template<typename DataType>
class cuMatrixOperations< DataType >

A static class for matrix-vector operations, which are similar to the level-2 operations of the BLAS library. This class acts as a templated namespace, where all member methods are public and static.

This class implements matrix-vector multiplication for three types of matrices:

  • Dense matrix (both row major and column major)
  • Compressed sparse row matrix (CSR)
  • Compressed sparse column matrix (CSC)

For each of the above matrix types, there are four kinds of matrix vector multiplications implemented.

  1. dot : performs \( \boldsymbol{c} = \mathbf{A} \boldsymbol{b} \).
  2. dot_plus : performs \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A} \boldsymbol{b} \).
  3. transpose_dot : performs \( \boldsymbol{c} = \mathbf{A}^{\intercal} \boldsymbol{b} \).
  4. transpose_dot_plus : performs \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A}^{\intercal} \boldsymbol{b} \).
See also
cuVectorOperations

Definition at line 58 of file cu_matrix_operations.h.

Member Function Documentation

◆ create_band_matrix()

template<typename DataType >
void cuMatrixOperations< DataType >::create_band_matrix ( cusparseHandle_t  cusparse_handle,
const DataType *  diagonals,
const DataType *  supdiagonals,
const IndexType  non_zero_size,
const FlagType  tridiagonal,
DataType **  matrix 
)
static

Creates bi-diagonal or symmetric tri-diagonal matrix from the diagonal array (diagonals) and off-diagonal array (supdiagonals).

The output is written in place (in matrix). The output is only written up to the non_zero_size element, that is: matrix[:non_zero_size,:non_zero_size] is filled, and the rest is assumed to be zero.

Depending on tridiagonal, the matrix is upper bi-diagonal or symmetric tri-diagonal.

Parameters
[in]cusparse_handleThe cuSparse object handle.
[in]diagonalsAn array of length n. All elements diagonals create the diagonals of matrix.
[in]supdiagonalsAn array of length n. Elements supdiagonals[0:-1] create the upper off-diagonal of matrix, making matrix an upper bi-diagonal matrix. In addition, if tridiagonal is set to 1, the lower off-diagonal is also created similar to the upper off-diagonal, making matrix a symmetric tri-diagonal matrix.
[in]non_zero_sizeUp to the matrix[:non_zero_size,:non_zero_size] of matrix will be written. At most, non_zero_size can be n, which is the size of diagonals array and the size of the square matrix. If non_zero_size is less than n, it is due to the fact that either diagonals or supdiagonals has zero elements after the size element (possibly due to early termination of Lanczos iterations method).
[in]tridiagonalBoolean. If set to 0, the matrix T becomes upper bi-diagonal. If set to 1, the matrix becomes symmetric tri-diagonal.
[out]matrixA 2D matrix (written in place) of the shape (n,n). This is the output of this function. This matrix is assumed to be initialized to zero before calling this function.

Definition at line 916 of file cu_matrix_operations.cu.

923 {
924  for (IndexType j=0; j < non_zero_size; ++j)
925  {
926  // Diagonals
927  matrix[j][j] = diagonals[j];
928 
929  // Off diagonals
930  if (j < non_zero_size-1)
931  {
932  // Sup-diagonal
933  matrix[j][j+1] = supdiagonals[j];
934 
935  // Sub-diagonal, making symmetric tri-diagonal matrix
936  if (tridiagonal)
937  {
938  matrix[j+1][j] = supdiagonals[j];
939  }
940  }
941  }
942 }
int IndexType
Definition: types.h:65

◆ csc_matvec()

template<typename DataType >
void cuMatrixOperations< DataType >::csc_matvec ( cusparseHandle_t  cusparse_handle,
const DataType *  A_data,
const LongIndexType A_row_indices,
const LongIndexType A_index_pointer,
const DataType *  b,
const LongIndexType  num_rows,
const LongIndexType  num_columns,
DataType *  c 
)
static

Computes \( \boldsymbol{c} = \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

Parameters
[in]cusparse_handleThe cuSparse object handle.
[in]A_dataCSC format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_row_indicesCSC format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_index_pointerCSC format index pointer. The length of this array is one plus the number of columns of the matrix. Also, the first element of this array is 0, and the last element is the nnz of the matrix.
[in]bColumn vector with same size of the number of columns of A.
[in]num_rowsNumber of rows of the matrix A.
[in]num_columnsNumber of columns of the matrix A. This is essentially the size of A_index_pointer array minus one.
[out]cOutput column vector with the same size as b. This array is written in-place.

Definition at line 633 of file cu_matrix_operations.cu.

642 {
643  LongIndexType index_pointer;
644  LongIndexType row;
645  LongIndexType column;
646 
647  // Initialize output to zero
648  for (row=0; row < num_rows; ++row)
649  {
650  c[row] = 0.0;
651  }
652 
653  for (column=0; column < num_columns; ++column)
654  {
655  for (index_pointer=A_index_pointer[column];
656  index_pointer < A_index_pointer[column+1];
657  ++index_pointer)
658  {
659  row = A_row_indices[index_pointer];
660  c[row] += A_data[index_pointer] * b[column];
661  }
662  }
663 }
int LongIndexType
Definition: types.h:60

◆ csc_matvec_plus()

template<typename DataType >
void cuMatrixOperations< DataType >::csc_matvec_plus ( cusparseHandle_t  cusparse_handle,
const DataType *  A_data,
const LongIndexType A_row_indices,
const LongIndexType A_index_pointer,
const DataType *  b,
const DataType  alpha,
const LongIndexType  num_rows,
const LongIndexType  num_columns,
DataType *  c 
)
static

Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

Parameters
[in]cusparse_handleThe cuSparse object handle.
[in]A_dataCSC format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_row_indicesCSC format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_index_pointerCSC format index pointer. The length of this array is one plus the number of columns of the matrix. Also, the first element of this array is 0, and the last element is the nnz of the matrix.
[in]bColumn vector with same size of the number of columns of A.
[in]alphaA scalar that scales the matrix vector multiplication.
[in]num_rowsNumber of rows of the matrix A.
[in]num_columnsNumber of columns of the matrix A. This is essentially the size of A_index_pointer array minus one.
[in,out]cOutput column vector with the same size as b. This array is written in-place.

Definition at line 704 of file cu_matrix_operations.cu.

714 {
715  if (alpha == 0.0)
716  {
717  return;
718  }
719 
720  LongIndexType index_pointer;
721  LongIndexType row;
722  LongIndexType column;
723 
724  for (column=0; column < num_columns; ++column)
725  {
726  for (index_pointer=A_index_pointer[column];
727  index_pointer < A_index_pointer[column+1];
728  ++index_pointer)
729  {
730  row = A_row_indices[index_pointer];
731  c[row] += alpha * A_data[index_pointer] * b[column];
732  }
733  }
734 }

◆ csc_transposed_matvec()

template<typename DataType >
void cuMatrixOperations< DataType >::csc_transposed_matvec ( cusparseHandle_t  cusparse_handle,
const DataType *  A_data,
const LongIndexType A_row_indices,
const LongIndexType A_index_pointer,
const DataType *  b,
const LongIndexType  num_columns,
DataType *  c 
)
static

Computes \(\boldsymbol{c} =\mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

Parameters
[in]cusparse_handleThe cuSparse object handle.
[in]A_dataCSC format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_row_indicesCSC format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_index_pointerCSC format index pointer. The length of this array is one plus the number of columns of the matrix. Also, the first element of this array is 0, and the last element is the nnz of the matrix.
[in]bColumn vector with same size of the number of columns of A.
num_columnsNumber of columns of the matrix A. This is essentially the size of A_index_pointer array minus one.
[out]cOutput column vector with the same size as b. This array is written in-place.

Definition at line 770 of file cu_matrix_operations.cu.

778 {
779  LongIndexType index_pointer;
780  LongIndexType row;
781  LongIndexType column;
782  DataType sum;
783 
784  for (column=0; column < num_columns; ++column)
785  {
786  sum = 0.0;
787  for (index_pointer=A_index_pointer[column];
788  index_pointer < A_index_pointer[column+1];
789  ++index_pointer)
790  {
791  row = A_row_indices[index_pointer];
792  sum += A_data[index_pointer] * b[row];
793  }
794  c[column] = sum;
795  }
796 }

◆ csc_transposed_matvec_plus()

template<typename DataType >
void cuMatrixOperations< DataType >::csc_transposed_matvec_plus ( cusparseHandle_t  cusparse_handle,
const DataType *  A_data,
const LongIndexType A_row_indices,
const LongIndexType A_index_pointer,
const DataType *  b,
const DataType  alpha,
const LongIndexType  num_columns,
DataType *  c 
)
static

Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

Parameters
[in]cusparse_handleThe cuSparse object handle.
[in]A_dataCSC format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_row_indicesCSC format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_index_pointerCSC format index pointer. The length of this array is one plus the number of columns of the matrix. Also, the first element of this array is 0, and the last element is the nnz of the matrix.
[in]bColumn vector with same size of the number of columns of A.
[in]alphaA scalar that scales the matrix vector multiplication.
num_columnsNumber of columns of the matrix A. This is essentially the size of A_index_pointer array minus one.
[in,out]cOutput column vector with the same size as b. This array is written in-place.

Definition at line 835 of file cu_matrix_operations.cu.

844 {
845  if (alpha == 0.0)
846  {
847  return;
848  }
849 
850  LongIndexType index_pointer;
851  LongIndexType row;
852  LongIndexType column;
853  DataType sum;
854 
855  for (column=0; column < num_columns; ++column)
856  {
857  sum = 0.0;
858  for (index_pointer=A_index_pointer[column];
859  index_pointer < A_index_pointer[column+1];
860  ++index_pointer)
861  {
862  row = A_row_indices[index_pointer];
863  sum += A_data[index_pointer] * b[row];
864  }
865  c[column] += alpha * sum;
866  }
867 }

◆ csr_matvec()

template<typename DataType >
void cuMatrixOperations< DataType >::csr_matvec ( cusparseHandle_t  cusparse_handle,
const DataType *  A_data,
const LongIndexType A_column_indices,
const LongIndexType A_index_pointer,
const DataType *  b,
const LongIndexType  num_rows,
DataType *  c 
)
static

Computes \( \boldsymbol{c} = \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

Parameters
[in]cusparse_handleThe cuSparse object handle.
[in]A_dataCSR format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_column_indicesCSR format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_index_pointerCSR format index pointer. The length of this array is one plus the number of rows of the matrix. Also, the first element of this array is 0, and the last element is the nnz of the matrix.
[in]bColumn vector with same size of the number of columns of A.
[in]num_rowsNumber of rows of the matrix A. This is essentially the size of A_index_pointer array minus one.
[out]cOutput column vector with the same size as b. This array is written in-place.

Definition at line 361 of file cu_matrix_operations.cu.

369 {
370  LongIndexType index_pointer;
371  LongIndexType row;
372  LongIndexType column;
373  DataType sum;
374 
375  for (row=0; row < num_rows; ++row)
376  {
377  sum = 0.0;
378  for (index_pointer=A_index_pointer[row];
379  index_pointer < A_index_pointer[row+1];
380  ++index_pointer)
381  {
382  column = A_column_indices[index_pointer];
383  sum += A_data[index_pointer] * b[column];
384  }
385  c[row] = sum;
386  }
387 }

◆ csr_matvec_plus()

template<typename DataType >
void cuMatrixOperations< DataType >::csr_matvec_plus ( cusparseHandle_t  cusparse_handle,
const DataType *  A_data,
const LongIndexType A_column_indices,
const LongIndexType A_index_pointer,
const DataType *  b,
const DataType  alpha,
const LongIndexType  num_rows,
DataType *  c 
)
static

Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

Parameters
[in]cusparse_handleThe cuSparse object handle.
[in]A_dataCSR format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_column_indicesCSR format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_index_pointerCSR format index pointer. The length of this array is one plus the number of rows of the matrix. Also, the first element of this array is 0, and the last element is the nnz of the matrix.
[in]bColumn vector with same size of the number of columns of A.
[in]alphaA scalar that scales the matrix vector multiplication.
[in]num_rowsNumber of rows of the matrix A. This is essentially the size of A_index_pointer array minus one.
[in,out]cOutput column vector with the same size as b. This array is written in-place.

Definition at line 426 of file cu_matrix_operations.cu.

435 {
436  if (alpha == 0.0)
437  {
438  return;
439  }
440 
441  LongIndexType index_pointer;
442  LongIndexType row;
443  LongIndexType column;
444  DataType sum;
445 
446  for (row=0; row < num_rows; ++row)
447  {
448  sum = 0.0;
449  for (index_pointer=A_index_pointer[row];
450  index_pointer < A_index_pointer[row+1];
451  ++index_pointer)
452  {
453  column = A_column_indices[index_pointer];
454  sum += A_data[index_pointer] * b[column];
455  }
456  c[row] += alpha * sum;
457  }
458 }

◆ csr_transposed_matvec()

template<typename DataType >
void cuMatrixOperations< DataType >::csr_transposed_matvec ( cusparseHandle_t  cusparse_handle,
const DataType *  A_data,
const LongIndexType A_column_indices,
const LongIndexType A_index_pointer,
const DataType *  b,
const LongIndexType  num_rows,
const LongIndexType  num_columns,
DataType *  c 
)
static

Computes \(\boldsymbol{c} =\mathbf{A}^{\intercal} \boldsymbol{b}\) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

Parameters
[in]cusparse_handleThe cuSparse object handle.
[in]A_dataCSR format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_column_indicesCSR format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_index_pointerCSR format index pointer. The length of this array is one plus the number of rows of the matrix. Also, the first element of this array is 0, and the last element is the nnz of the matrix.
[in]bColumn vector with same size of the number of columns of A.
[in]num_rowsNumber of rows of the matrix A. This is essentially the size of A_index_pointer array minus one.
[in]num_columnsNumber of columns of the matrix A.
[out]cOutput column vector with the same size as b. This array is written in-place.

Definition at line 495 of file cu_matrix_operations.cu.

504 {
505  LongIndexType index_pointer;
506  LongIndexType row;
507  LongIndexType column;
508 
509  // Initialize output to zero
510  for (column=0; column < num_columns; ++column)
511  {
512  c[column] = 0.0;
513  }
514 
515  for (row=0; row < num_rows; ++row)
516  {
517  for (index_pointer=A_index_pointer[row];
518  index_pointer < A_index_pointer[row+1];
519  ++index_pointer)
520  {
521  column = A_column_indices[index_pointer];
522  c[column] += A_data[index_pointer] * b[row];
523  }
524  }
525 }

◆ csr_transposed_matvec_plus()

template<typename DataType >
void cuMatrixOperations< DataType >::csr_transposed_matvec_plus ( cusparseHandle_t  cusparse_handle,
const DataType *  A_data,
const LongIndexType A_column_indices,
const LongIndexType A_index_pointer,
const DataType *  b,
const DataType  alpha,
const LongIndexType  num_rows,
const LongIndexType  num_columns,
DataType *  c 
)
static

Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A}^{\intercal} \boldsymbol{b}\) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

Parameters
[in]cusparse_handleThe cuSparse object handle.
[in]A_dataCSR format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_column_indicesCSR format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_index_pointerCSR format index pointer. The length of this array is one plus the number of rows of the matrix. Also, the first element of this array is 0, and the last element is the nnz of the matrix.
[in]bColumn vector with same size of the number of columns of A.
[in]alphaA scalar that scales the matrix vector multiplication.
[in]num_rowsNumber of rows of the matrix A. This is essentially the size of A_index_pointer array minus one.
[in]num_columnsNumber of columns of the matrix A.
[in,out]cOutput column vector with the same size as b. This array is written in-place.

Definition at line 566 of file cu_matrix_operations.cu.

576 {
577  if (alpha == 0.0)
578  {
579  return;
580  }
581 
582  LongIndexType index_pointer;
583  LongIndexType row;
584  LongIndexType column;
585 
586  for (row=0; row < num_rows; ++row)
587  {
588  for (index_pointer=A_index_pointer[row];
589  index_pointer < A_index_pointer[row+1];
590  ++index_pointer)
591  {
592  column = A_column_indices[index_pointer];
593  c[column] += alpha * A_data[index_pointer] * b[row];
594  }
595  }
596 }

◆ dense_matvec()

template<typename DataType >
void cuMatrixOperations< DataType >::dense_matvec ( cublasHandle_t  cublas_handle,
const DataType *  A,
const DataType *  b,
const LongIndexType  num_rows,
const LongIndexType  num_columns,
const FlagType  A_is_row_major,
DataType *  c 
)
static

Computes the matrix vector multiplication \( \boldsymbol{c} = \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is a dense matrix.

Parameters
[in]cublas_handleThe cuBLAS object handle.
[in]A1D array that represents a 2D dense array with either C (row) major ordering or Fortran (column) major ordering. The major ordering should de defined by A_is_row_major flag.
[in]bColumn vector
[in]num_rowsNumber of rows of A
[in]num_columnsNumber of columns of A
[in]A_is_row_majorBoolean, can be 0 or 1 as follows:
  • If A is row major (C ordering where the last index is contiguous) this value should be 1.
  • If A is column major (Fortran ordering where the first index is contiguous), this value should be set to 0.
[out]cThe output column vector (written in-place).

Definition at line 52 of file cu_matrix_operations.cu.

60 {
61  cublasOperation_t trans;
62  int m;
63  int n;
64  int lda;
65  DataType alpha = 1.0;
66  DataType beta = 0.0;
67  int incb = 1;
68  int incc = 1;
69 
70  // Since cublas accepts column major (Fortran) ordering, use transpose for
71  // row_major matrix.
72  if (A_is_row_major)
73  {
74  trans = CUBLAS_OP_T;
75  m = num_columns;
76  n = num_rows;
77  }
78  else
79  {
80  trans = CUBLAS_OP_N;
81  m = num_rows;
82  n = num_columns;
83  }
84 
85  lda = m;
86 
87  // Calling cublas
88  cublasStatus_t status = cublas_interface::cublasXgemv(cublas_handle, trans,
89  m, n, &alpha, A, lda,
90  b, incb, &beta, c,
91  incc);
92  assert(status == CUBLAS_STATUS_SUCCESS);
93 }
cublasStatus_t cublasXgemv(cublasHandle_t handle, cublasOperation_t trans, int m, int n, const DataType *alpha, const DataType *A, int lda, const DataType *x, int incx, const DataType *beta, DataType *y, int incy)

References cublas_interface::cublasXgemv().

Referenced by cuDenseMatrix< DataType >::dot().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dense_matvec_plus()

template<typename DataType >
void cuMatrixOperations< DataType >::dense_matvec_plus ( cublasHandle_t  cublas_handle,
const DataType *  A,
const DataType *  b,
const DataType  alpha,
const LongIndexType  num_rows,
const LongIndexType  num_columns,
const FlagType  A_is_row_major,
DataType *  c 
)
static

Computes the operation \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is a dense matrix.

Parameters
[in]cublas_handleThe cuBLAS object handle.
[in]A1D array that represents a 2D dense array with either C (row) major ordering or Fortran (column) major ordering. The major ordering should de defined by A_is_row_major flag.
[in]bColumn vector
[in]alphaA scalar that scales the matrix vector multiplication.
[in]num_rowsNumber of rows of A
[in]num_columnsNumber of columns of A
[in]A_is_row_majorBoolean, can be 0 or 1 as follows:
  • If A is row major (C ordering where the last index is contiguous) this value should be 1.
  • If A is column major (Fortran ordering where the first index is contiguous), this value should be set to 0.
[in,out]cThe output column vector (written in-place).

Definition at line 128 of file cu_matrix_operations.cu.

137 {
138  cublasOperation_t trans;
139  int m;
140  int n;
141  int lda;
142  DataType beta = 1.0;
143  int incb = 1;
144  int incc = 1;
145 
146  // Since cublas accepts column major (Fortran) ordering, use transpose for
147  // row_major matrix.
148  if (A_is_row_major)
149  {
150  trans = CUBLAS_OP_T;
151  m = num_columns;
152  n = num_rows;
153  }
154  else
155  {
156  trans = CUBLAS_OP_N;
157  m = num_rows;
158  n = num_columns;
159  }
160 
161  lda = m;
162 
163  // Calling cublas
164  cublasStatus_t status = cublas_interface::cublasXgemv(cublas_handle, trans,
165  m, n, &alpha, A, lda,
166  b, incb, &beta, c,
167  incc);
168  assert(status == CUBLAS_STATUS_SUCCESS);
169 }

References cublas_interface::cublasXgemv().

Referenced by cuDenseMatrix< DataType >::dot_plus().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dense_transposed_matvec()

template<typename DataType >
void cuMatrixOperations< DataType >::dense_transposed_matvec ( cublasHandle_t  cublas_handle,
const DataType *  A,
const DataType *  b,
const LongIndexType  num_rows,
const LongIndexType  num_columns,
const FlagType  A_is_row_major,
DataType *  c 
)
static

Computes matrix vector multiplication \(\boldsymbol{c} = \mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is dense, and \( \mathbf{A}^{\intercal} \) is the transpose of the matrix \( \mathbf{A} \).

Parameters
[in]cublas_handleThe cuBLAS object handle.
[in]A1D array that represents a 2D dense array with either C (row) major ordering or Fortran (column) major ordering. The major ordering should de defined by A_is_row_major flag.
[in]bColumn vector
[in]num_rowsNumber of rows of A
[in]num_columnsNumber of columns of A
[in]A_is_row_majorBoolean, can be 0 or 1 as follows:
  • If A is row major (C ordering where the last index is contiguous) this value should be 1.
  • f A is column major (Fortran ordering where the first index is contiguous), this value should be set to 0.
[out]cThe output column vector (written in-place).

Definition at line 203 of file cu_matrix_operations.cu.

211 {
212  cublasOperation_t trans;
213  int m;
214  int n;
215  int lda;
216  DataType alpha = 1.0;
217  DataType beta = 0.0;
218  int incb = 1;
219  int incc = 1;
220 
221  // Since cublas accepts column major (Fortran) ordering, use non-transpose
222  // for row_major matrix.
223  if (A_is_row_major)
224  {
225  trans = CUBLAS_OP_N;
226  m = num_columns;
227  n = num_rows;
228  }
229  else
230  {
231  trans = CUBLAS_OP_T;
232  m = num_rows;
233  n = num_columns;
234  }
235 
236  lda = m;
237 
238  // Calling cublas
239  cublasStatus_t status = cublas_interface::cublasXgemv(cublas_handle, trans,
240  m, n, &alpha, A, lda,
241  b, incb, &beta, c,
242  incc);
243  assert(status == CUBLAS_STATUS_SUCCESS);
244 }

References cublas_interface::cublasXgemv().

Referenced by cuDenseMatrix< DataType >::transpose_dot().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dense_transposed_matvec_plus()

template<typename DataType >
void cuMatrixOperations< DataType >::dense_transposed_matvec_plus ( cublasHandle_t  cublas_handle,
const DataType *  A,
const DataType *  b,
const DataType  alpha,
const LongIndexType  num_rows,
const LongIndexType  num_columns,
const FlagType  A_is_row_major,
DataType *  c 
)
static

Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is dense, and \( \mathbf{A}^{\intercal} \) is the transpose of the matrix \( \mathbf{A} \).

Parameters
[in]cublas_handleThe cuBLAS object handle.
[in]A1D array that represents a 2D dense array with either C (row) major ordering or Fortran (column) major ordering. The major ordering should de defined by A_is_row_major flag.
[in]bColumn vector
[in]alphaA scalar that scales the matrix vector multiplication.
[in]num_rowsNumber of rows of A
[in]num_columnsNumber of columns of A
[in]A_is_row_majorBoolean, can be 0 or 1 as follows:
  • If A is row major (C ordering where the last index is contiguous) this value should be 1.
  • f A is column major (Fortran ordering where the first index is contiguous), this value should be set to 0.
[in,out]cThe output column vector (written in-place).

Definition at line 280 of file cu_matrix_operations.cu.

289 {
290  if (alpha == 0.0)
291  {
292  return;
293  }
294 
295  cublasOperation_t trans;
296  int m;
297  int n;
298  int lda;
299  DataType beta = 0.0;
300  int incb = 1;
301  int incc = 1;
302 
303  // Since cublas accepts column major (Fortran) ordering, use non-transpose
304  // for row_major matrix.
305  if (A_is_row_major)
306  {
307  trans = CUBLAS_OP_N;
308  m = num_columns;
309  n = num_rows;
310  }
311  else
312  {
313  trans = CUBLAS_OP_T;
314  m = num_rows;
315  n = num_columns;
316  }
317 
318  lda = m;
319 
320  // Calling cublas
321  cublasStatus_t status = cublas_interface::cublasXgemv(cublas_handle, trans,
322  m, n, &alpha, A, lda,
323  b, incb, &beta, c,
324  incc);
325  assert(status == CUBLAS_STATUS_SUCCESS);
326 }

References cublas_interface::cublasXgemv().

Referenced by cuDenseMatrix< DataType >::transpose_dot_plus().

Here is the call graph for this function:
Here is the caller graph for this function:

The documentation for this class was generated from the following files: