imate
C++/CUDA Reference
Loading...
Searching...
No Matches
cuMatrixOperations< DataType > Class Template Reference

A static class for matrix-vector operations, which are similar to the level-2 operations of the BLAS library. This class acts as a templated namespace, where all member methods are public and static. More...

#include <cu_matrix_operations.h>

Static Public Member Functions

static void dense_matvec (cublasHandle_t cublas_handle, const DataType *RESTRICT A, const DataType *RESTRICT b, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *RESTRICT c)
 Computes the matrix vector multiplication \( \boldsymbol{c} = \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is a dense matrix.
 
static void dense_matvec_plus (cublasHandle_t cublas_handle, const DataType *RESTRICT A, const DataType *RESTRICT b, const DataType alpha, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *RESTRICT c)
 Computes the operation \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is a dense matrix.
 
static void dense_transposed_matvec (cublasHandle_t cublas_handle, const DataType *RESTRICT A, const DataType *RESTRICT b, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *RESTRICT c)
 Computes matrix vector multiplication \(\boldsymbol{c} = \mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is dense, and \( \mathbf{A}^{\intercal} \) is the transpose of the matrix \( \mathbf{A} \).
 
static void dense_transposed_matvec_plus (cublasHandle_t cublas_handle, const DataType *RESTRICT A, const DataType *RESTRICT b, const DataType alpha, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *RESTRICT c)
 Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is dense, and \( \mathbf{A}^{\intercal} \) is the transpose of the matrix \( \mathbf{A} \).
 
static void csr_matvec (cusparseHandle_t cusparse_handle, const DataType *RESTRICT A_data, const LongIndexType *RESTRICT A_column_indices, const LongIndexType *RESTRICT A_index_pointer, const DataType *RESTRICT b, const LongIndexType num_rows, DataType *RESTRICT c)
 Computes \( \boldsymbol{c} = \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.
 
static void csr_matvec_plus (cusparseHandle_t cusparse_handle, const DataType *RESTRICT A_data, const LongIndexType *RESTRICT A_column_indices, const LongIndexType *RESTRICT A_index_pointer, const DataType *RESTRICT b, const DataType alpha, const LongIndexType num_rows, DataType *RESTRICT c)
 Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.
 
static void csr_transposed_matvec (cusparseHandle_t cusparse_handle, const DataType *RESTRICT A_data, const LongIndexType *RESTRICT A_column_indices, const LongIndexType *RESTRICT A_index_pointer, const DataType *RESTRICT b, const LongIndexType num_rows, const LongIndexType num_columns, DataType *RESTRICT c)
 Computes \(\boldsymbol{c} =\mathbf{A}^{\intercal} \boldsymbol{b}\) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.
 
static void csr_transposed_matvec_plus (cusparseHandle_t cusparse_handle, const DataType *RESTRICT A_data, const LongIndexType *RESTRICT A_column_indices, const LongIndexType *RESTRICT A_index_pointer, const DataType *RESTRICT b, const DataType alpha, const LongIndexType num_rows, const LongIndexType num_columns, DataType *RESTRICT c)
 Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A}^{\intercal} \boldsymbol{b}\) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.
 
static void csc_matvec (cusparseHandle_t cusparse_handle, const DataType *RESTRICT A_data, const LongIndexType *RESTRICT A_row_indices, const LongIndexType *RESTRICT A_index_pointer, const DataType *RESTRICT b, const LongIndexType num_rows, const LongIndexType num_columns, DataType *RESTRICT c)
 Computes \( \boldsymbol{c} = \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.
 
static void csc_matvec_plus (cusparseHandle_t cusparse_handle, const DataType *RESTRICT A_data, const LongIndexType *RESTRICT A_row_indices, const LongIndexType *RESTRICT A_index_pointer, const DataType *RESTRICT b, const DataType alpha, const LongIndexType num_rows, const LongIndexType num_columns, DataType *RESTRICT c)
 Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.
 
static void csc_transposed_matvec (cusparseHandle_t cusparse_handle, const DataType *RESTRICT A_data, const LongIndexType *RESTRICT A_row_indices, const LongIndexType *RESTRICT A_index_pointer, const DataType *RESTRICT b, const LongIndexType num_columns, DataType *RESTRICT c)
 Computes \(\boldsymbol{c} =\mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.
 
static void csc_transposed_matvec_plus (cusparseHandle_t cusparse_handle, const DataType *RESTRICT A_data, const LongIndexType *RESTRICT A_row_indices, const LongIndexType *RESTRICT A_index_pointer, const DataType *RESTRICT b, const DataType alpha, const LongIndexType num_columns, DataType *RESTRICT c)
 Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.
 
static void create_band_matrix (cusparseHandle_t cublas_handle, const DataType *RESTRICT diagonals, const DataType *RESTRICT supdiagonals, const IndexType non_zero_size, const FlagType tridiagonal, DataType **matrix)
 Creates bi-diagonal or symmetric tri-diagonal matrix from the diagonal array (diagonals) and off-diagonal array (supdiagonals).
 

Detailed Description

template<typename DataType>
class cuMatrixOperations< DataType >

A static class for matrix-vector operations, which are similar to the level-2 operations of the BLAS library. This class acts as a templated namespace, where all member methods are public and static.

This class implements matrix-vector multiplication for three types of matrices:

  • Dense matrix (both row major and column major)
  • Compressed sparse row matrix (CSR)
  • Compressed sparse column matrix (CSC)

For each of the above matrix types, there are four kinds of matrix vector multiplications implemented.

  1. dot : performs \( \boldsymbol{c} = \mathbf{A} \boldsymbol{b} \).
  2. dot_plus : performs \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A} \boldsymbol{b} \).
  3. transpose_dot : performs \( \boldsymbol{c} = \mathbf{A}^{\intercal} \boldsymbol{b} \).
  4. transpose_dot_plus : performs \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A}^{\intercal} \boldsymbol{b} \).
See also
cuVectorOperations

Definition at line 86 of file cu_matrix_operations.h.

Member Function Documentation

◆ create_band_matrix()

template<typename DataType >
void cuMatrixOperations< DataType >::create_band_matrix ( cusparseHandle_t  cublas_handle,
const DataType *RESTRICT  diagonals,
const DataType *RESTRICT  supdiagonals,
const IndexType  non_zero_size,
const FlagType  tridiagonal,
DataType **  matrix 
)
static

Creates bi-diagonal or symmetric tri-diagonal matrix from the diagonal array (diagonals) and off-diagonal array (supdiagonals).

The output is written in place (in matrix). The output is only written up to the non_zero_size element, that is: matrix[:non_zero_size,:non_zero_size] is filled, and the rest is assumed to be zero.

Depending on tridiagonal, the matrix is upper bi-diagonal or symmetric tri-diagonal.

Parameters
[in]cublas_handleThe cuSparse object handle.
[in]diagonalsAn array of length n. All elements diagonals create the diagonals of matrix.
[in]supdiagonalsAn array of length n. Elements supdiagonals[0:-1] create the upper off-diagonal of matrix, making matrix an upper bi-diagonal matrix. In addition, if tridiagonal is set to 1, the lower off-diagonal is also created similar to the upper off-diagonal, making matrix a symmetric tri-diagonal matrix.
[in]non_zero_sizeUp to the matrix[:non_zero_size,:non_zero_size] of matrix will be written. At most, non_zero_size can be n, which is the size of diagonals array and the size of the square matrix. If non_zero_size is less than n, it is due to the fact that either diagonals or supdiagonals has zero elements after the size element (possibly due to early termination of Lanczos iterations method).
[in]tridiagonalBoolean. If set to 0, the matrix T becomes upper bi-diagonal. If set to 1, the matrix becomes symmetric tri-diagonal.
[out]matrixA 2D matrix (written in place) of the shape (n,n). This is the output of this function. This matrix is assumed to be initialized to zero before calling this function.

Definition at line 783 of file cu_matrix_operations.cu.

790{
791 throw std::runtime_error("Function not implemented.");
792}

◆ csc_matvec()

template<typename DataType >
void cuMatrixOperations< DataType >::csc_matvec ( cusparseHandle_t  cusparse_handle,
const DataType *RESTRICT  A_data,
const LongIndexType *RESTRICT  A_row_indices,
const LongIndexType *RESTRICT  A_index_pointer,
const DataType *RESTRICT  b,
const LongIndexType  num_rows,
const LongIndexType  num_columns,
DataType *RESTRICT  c 
)
static

Computes \( \boldsymbol{c} = \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

Parameters
[in]cusparse_handleThe cuSparse object handle.
[in]A_dataCSC format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_row_indicesCSC format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_index_pointerCSC format index pointer. The length of this array is one plus the number of columns of the matrix. Also, the first element of this array is 0, and the last element is the nnz of the matrix.
[in]bColumn vector with same size of the number of columns of A.
[in]num_rowsNumber of rows of the matrix A.
[in]num_columnsNumber of columns of the matrix A. This is essentially the size of A_index_pointer array minus one.
[out]cOutput column vector with the same size as b. This array is written in-place.

Definition at line 574 of file cu_matrix_operations.cu.

583{
584 throw std::runtime_error("Function not implemented.");
585}

◆ csc_matvec_plus()

template<typename DataType >
void cuMatrixOperations< DataType >::csc_matvec_plus ( cusparseHandle_t  cusparse_handle,
const DataType *RESTRICT  A_data,
const LongIndexType *RESTRICT  A_row_indices,
const LongIndexType *RESTRICT  A_index_pointer,
const DataType *RESTRICT  b,
const DataType  alpha,
const LongIndexType  num_rows,
const LongIndexType  num_columns,
DataType *RESTRICT  c 
)
static

Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

Parameters
[in]cusparse_handleThe cuSparse object handle.
[in]A_dataCSC format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_row_indicesCSC format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_index_pointerCSC format index pointer. The length of this array is one plus the number of columns of the matrix. Also, the first element of this array is 0, and the last element is the nnz of the matrix.
[in]bColumn vector with same size of the number of columns of A.
[in]alphaA scalar that scales the matrix vector multiplication.
[in]num_rowsNumber of rows of the matrix A.
[in]num_columnsNumber of columns of the matrix A. This is essentially the size of A_index_pointer array minus one.
[in,out]cOutput column vector with the same size as b. This array is written in-place.

Definition at line 626 of file cu_matrix_operations.cu.

636{
637 throw std::runtime_error("Function not implemented.");
638}

◆ csc_transposed_matvec()

template<typename DataType >
void cuMatrixOperations< DataType >::csc_transposed_matvec ( cusparseHandle_t  cusparse_handle,
const DataType *RESTRICT  A_data,
const LongIndexType *RESTRICT  A_row_indices,
const LongIndexType *RESTRICT  A_index_pointer,
const DataType *RESTRICT  b,
const LongIndexType  num_columns,
DataType *RESTRICT  c 
)
static

Computes \(\boldsymbol{c} =\mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

Parameters
[in]cusparse_handleThe cuSparse object handle.
[in]A_dataCSC format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_row_indicesCSC format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_index_pointerCSC format index pointer. The length of this array is one plus the number of columns of the matrix. Also, the first element of this array is 0, and the last element is the nnz of the matrix.
[in]bColumn vector with same size of the number of columns of A.
num_columnsNumber of columns of the matrix A. This is essentially the size of A_index_pointer array minus one.
[out]cOutput column vector with the same size as b. This array is written in-place.

Definition at line 674 of file cu_matrix_operations.cu.

682{
683 throw std::runtime_error("Function not implemented.");
684}

◆ csc_transposed_matvec_plus()

template<typename DataType >
void cuMatrixOperations< DataType >::csc_transposed_matvec_plus ( cusparseHandle_t  cusparse_handle,
const DataType *RESTRICT  A_data,
const LongIndexType *RESTRICT  A_row_indices,
const LongIndexType *RESTRICT  A_index_pointer,
const DataType *RESTRICT  b,
const DataType  alpha,
const LongIndexType  num_columns,
DataType *RESTRICT  c 
)
static

Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

Parameters
[in]cusparse_handleThe cuSparse object handle.
[in]A_dataCSC format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_row_indicesCSC format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_index_pointerCSC format index pointer. The length of this array is one plus the number of columns of the matrix. Also, the first element of this array is 0, and the last element is the nnz of the matrix.
[in]bColumn vector with same size of the number of columns of A.
[in]alphaA scalar that scales the matrix vector multiplication.
num_columnsNumber of columns of the matrix A. This is essentially the size of A_index_pointer array minus one.
[in,out]cOutput column vector with the same size as b. This array is written in-place.

Definition at line 723 of file cu_matrix_operations.cu.

732{
733 throw std::runtime_error("Function not implemented.");
734}

◆ csr_matvec()

template<typename DataType >
void cuMatrixOperations< DataType >::csr_matvec ( cusparseHandle_t  cusparse_handle,
const DataType *RESTRICT  A_data,
const LongIndexType *RESTRICT  A_column_indices,
const LongIndexType *RESTRICT  A_index_pointer,
const DataType *RESTRICT  b,
const LongIndexType  num_rows,
DataType *RESTRICT  c 
)
static

Computes \( \boldsymbol{c} = \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

Parameters
[in]cusparse_handleThe cuSparse object handle.
[in]A_dataCSR format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_column_indicesCSR format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_index_pointerCSR format index pointer. The length of this array is one plus the number of rows of the matrix. Also, the first element of this array is 0, and the last element is the nnz of the matrix.
[in]bColumn vector with same size of the number of columns of A.
[in]num_rowsNumber of rows of the matrix A. This is essentially the size of A_index_pointer array minus one.
[out]cOutput column vector with the same size as b. This array is written in-place.

Definition at line 376 of file cu_matrix_operations.cu.

384{
385 throw std::runtime_error("Function not implemented.");
386}

◆ csr_matvec_plus()

template<typename DataType >
void cuMatrixOperations< DataType >::csr_matvec_plus ( cusparseHandle_t  cusparse_handle,
const DataType *RESTRICT  A_data,
const LongIndexType *RESTRICT  A_column_indices,
const LongIndexType *RESTRICT  A_index_pointer,
const DataType *RESTRICT  b,
const DataType  alpha,
const LongIndexType  num_rows,
DataType *RESTRICT  c 
)
static

Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

Parameters
[in]cusparse_handleThe cuSparse object handle.
[in]A_dataCSR format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_column_indicesCSR format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_index_pointerCSR format index pointer. The length of this array is one plus the number of rows of the matrix. Also, the first element of this array is 0, and the last element is the nnz of the matrix.
[in]bColumn vector with same size of the number of columns of A.
[in]alphaA scalar that scales the matrix vector multiplication.
[in]num_rowsNumber of rows of the matrix A. This is essentially the size of A_index_pointer array minus one.
[in,out]cOutput column vector with the same size as b. This array is written in-place.

Definition at line 425 of file cu_matrix_operations.cu.

434{
435 throw std::runtime_error("Function not implemented.");
436}

◆ csr_transposed_matvec()

template<typename DataType >
void cuMatrixOperations< DataType >::csr_transposed_matvec ( cusparseHandle_t  cusparse_handle,
const DataType *RESTRICT  A_data,
const LongIndexType *RESTRICT  A_column_indices,
const LongIndexType *RESTRICT  A_index_pointer,
const DataType *RESTRICT  b,
const LongIndexType  num_rows,
const LongIndexType  num_columns,
DataType *RESTRICT  c 
)
static

Computes \(\boldsymbol{c} =\mathbf{A}^{\intercal} \boldsymbol{b}\) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

Parameters
[in]cusparse_handleThe cuSparse object handle.
[in]A_dataCSR format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_column_indicesCSR format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_index_pointerCSR format index pointer. The length of this array is one plus the number of rows of the matrix. Also, the first element of this array is 0, and the last element is the nnz of the matrix.
[in]bColumn vector with same size of the number of columns of A.
[in]num_rowsNumber of rows of the matrix A. This is essentially the size of A_index_pointer array minus one.
[in]num_columnsNumber of columns of the matrix A.
[out]cOutput column vector with the same size as b. This array is written in-place.

Definition at line 473 of file cu_matrix_operations.cu.

482{
483 throw std::runtime_error("Function not implemented.");
484}

◆ csr_transposed_matvec_plus()

template<typename DataType >
void cuMatrixOperations< DataType >::csr_transposed_matvec_plus ( cusparseHandle_t  cusparse_handle,
const DataType *RESTRICT  A_data,
const LongIndexType *RESTRICT  A_column_indices,
const LongIndexType *RESTRICT  A_index_pointer,
const DataType *RESTRICT  b,
const DataType  alpha,
const LongIndexType  num_rows,
const LongIndexType  num_columns,
DataType *RESTRICT  c 
)
static

Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A}^{\intercal} \boldsymbol{b}\) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

Parameters
[in]cusparse_handleThe cuSparse object handle.
[in]A_dataCSR format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_column_indicesCSR format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]A_index_pointerCSR format index pointer. The length of this array is one plus the number of rows of the matrix. Also, the first element of this array is 0, and the last element is the nnz of the matrix.
[in]bColumn vector with same size of the number of columns of A.
[in]alphaA scalar that scales the matrix vector multiplication.
[in]num_rowsNumber of rows of the matrix A. This is essentially the size of A_index_pointer array minus one.
[in]num_columnsNumber of columns of the matrix A.
[in,out]cOutput column vector with the same size as b. This array is written in-place.

Definition at line 525 of file cu_matrix_operations.cu.

535{
536 throw std::runtime_error("Function not implemented.");
537}

◆ dense_matvec()

template<typename DataType >
void cuMatrixOperations< DataType >::dense_matvec ( cublasHandle_t  cublas_handle,
const DataType *RESTRICT  A,
const DataType *RESTRICT  b,
const LongIndexType  num_rows,
const LongIndexType  num_columns,
const FlagType  A_is_row_major,
DataType *RESTRICT  c 
)
static

Computes the matrix vector multiplication \( \boldsymbol{c} = \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is a dense matrix.

Parameters
[in]cublas_handleThe cuBLAS object handle.
[in]A1D array that represents a 2D dense array with either C (row) major ordering or Fortran (column) major ordering. The major ordering should de defined by A_is_row_major flag.
[in]bColumn vector
[in]num_rowsNumber of rows of A
[in]num_columnsNumber of columns of A
[in]A_is_row_majorBoolean, can be 0 or 1 as follows:
  • If A is row major (C ordering where the last index is contiguous) this value should be 1.
  • If A is column major (Fortran ordering where the first index is contiguous), this value should be set to 0.
[out]cThe output column vector (written in-place).

Definition at line 58 of file cu_matrix_operations.cu.

66{
67 cublasOperation_t trans;
68 int m;
69 int n;
70 int lda;
71 DataType alpha = cu_arithmetics::cast<float, DataType>(1.0f);
72 DataType beta = cu_arithmetics::cast<float, DataType>(0.0f);
73 int incb = 1;
74 int incc = 1;
75
76 // Since cublas accepts column major (Fortran) ordering, use transpose for
77 // row_major matrix.
78 if (A_is_row_major)
79 {
80 // A is row-major, not compatible with cublas. Use transpose instead.
81 trans = CUBLAS_OP_T;
82 m = num_columns;
83 n = num_rows;
84 }
85 else
86 {
87 // A is column-major, compatible with cublas.
88 trans = CUBLAS_OP_N;
89 m = num_rows;
90 n = num_columns;
91 }
92
93 lda = m;
94
95 // Calling cublas
96 cublasStatus_t status = cublas_api::cublasXgemv<DataType>(
97 cublas_handle, trans, m, n, &alpha, A, lda, b, incb, &beta, c,
98 incc);
99
100 assert(status == CUBLAS_STATUS_SUCCESS);
101}
__host__ __device__ DataType abs(const DataType x)
Absolute value of a floating point number.

References cu_arithmetics::abs().

Referenced by cuDenseMatrix< DataType >::dot().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dense_matvec_plus()

template<typename DataType >
void cuMatrixOperations< DataType >::dense_matvec_plus ( cublasHandle_t  cublas_handle,
const DataType *RESTRICT  A,
const DataType *RESTRICT  b,
const DataType  alpha,
const LongIndexType  num_rows,
const LongIndexType  num_columns,
const FlagType  A_is_row_major,
DataType *RESTRICT  c 
)
static

Computes the operation \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is a dense matrix.

Parameters
[in]cublas_handleThe cuBLAS object handle.
[in]A1D array that represents a 2D dense array with either C (row) major ordering or Fortran (column) major ordering. The major ordering should de defined by A_is_row_major flag.
[in]bColumn vector
[in]alphaA scalar that scales the matrix vector multiplication.
[in]num_rowsNumber of rows of A
[in]num_columnsNumber of columns of A
[in]A_is_row_majorBoolean, can be 0 or 1 as follows:
  • If A is row major (C ordering where the last index is contiguous) this value should be 1.
  • If A is column major (Fortran ordering where the first index is contiguous), this value should be set to 0.
[in,out]cThe output column vector (written in-place).

Definition at line 136 of file cu_matrix_operations.cu.

145{
146 DataType zero = cu_arithmetics::cast<float, DataType>(0.0f);
147 if (cu_arithmetics::is_equal(alpha, zero))
148 {
149 return;
150 }
151
152 cublasOperation_t trans;
153 int m;
154 int n;
155 int lda;
156 DataType beta = cu_arithmetics::cast<float, DataType>(1.0f);
157 int incb = 1;
158 int incc = 1;
159
160 // Since cublas accepts column major (Fortran) ordering, use transpose for
161 // row_major matrix.
162 if (A_is_row_major)
163 {
164 trans = CUBLAS_OP_T;
165 m = num_columns;
166 n = num_rows;
167 }
168 else
169 {
170 trans = CUBLAS_OP_N;
171 m = num_rows;
172 n = num_columns;
173 }
174
175 lda = m;
176
177 // Calling cublas
178 cublasStatus_t status = cublas_api::cublasXgemv<DataType>(
179 cublas_handle, trans, m, n, &alpha, A, lda, b, incb, &beta, c,
180 incc);
181
182 assert(status == CUBLAS_STATUS_SUCCESS);
183}
bool is_equal(DataType x, DataType y)
Check if two floating point numbers are equal within a tolerance.

References cu_arithmetics::abs(), and cu_arithmetics::is_equal().

Referenced by cuDenseMatrix< DataType >::dot_plus().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dense_transposed_matvec()

template<typename DataType >
void cuMatrixOperations< DataType >::dense_transposed_matvec ( cublasHandle_t  cublas_handle,
const DataType *RESTRICT  A,
const DataType *RESTRICT  b,
const LongIndexType  num_rows,
const LongIndexType  num_columns,
const FlagType  A_is_row_major,
DataType *RESTRICT  c 
)
static

Computes matrix vector multiplication \(\boldsymbol{c} = \mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is dense, and \( \mathbf{A}^{\intercal} \) is the transpose of the matrix \( \mathbf{A} \).

Parameters
[in]cublas_handleThe cuBLAS object handle.
[in]A1D array that represents a 2D dense array with either C (row) major ordering or Fortran (column) major ordering. The major ordering should de defined by A_is_row_major flag.
[in]bColumn vector
[in]num_rowsNumber of rows of A
[in]num_columnsNumber of columns of A
[in]A_is_row_majorBoolean, can be 0 or 1 as follows:
  • If A is row major (C ordering where the last index is contiguous) this value should be 1.
  • f A is column major (Fortran ordering where the first index is contiguous), this value should be set to 0.
[out]cThe output column vector (written in-place).

Definition at line 217 of file cu_matrix_operations.cu.

225{
226 cublasOperation_t trans;
227 int m;
228 int n;
229 int lda;
230 DataType alpha = cu_arithmetics::cast<float, DataType>(1.0f);
231 DataType beta = cu_arithmetics::cast<float, DataType>(0.0f);
232 int incb = 1;
233 int incc = 1;
234
235 // Since cublas accepts column major (Fortran) ordering, use non-transpose
236 // for row_major matrix.
237 if (A_is_row_major)
238 {
239 trans = CUBLAS_OP_N;
240 m = num_columns;
241 n = num_rows;
242 }
243 else
244 {
245 trans = CUBLAS_OP_T;
246 m = num_rows;
247 n = num_columns;
248 }
249
250 lda = m;
251
252 // Calling cublas
253 cublasStatus_t status = cublas_api::cublasXgemv<DataType>(
254 cublas_handle, trans, m, n, &alpha, A, lda, b, incb, &beta, c,
255 incc);
256
257 assert(status == CUBLAS_STATUS_SUCCESS);
258}

References cu_arithmetics::abs().

Referenced by cuDenseMatrix< DataType >::transpose_dot().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dense_transposed_matvec_plus()

template<typename DataType >
void cuMatrixOperations< DataType >::dense_transposed_matvec_plus ( cublasHandle_t  cublas_handle,
const DataType *RESTRICT  A,
const DataType *RESTRICT  b,
const DataType  alpha,
const LongIndexType  num_rows,
const LongIndexType  num_columns,
const FlagType  A_is_row_major,
DataType *RESTRICT  c 
)
static

Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is dense, and \( \mathbf{A}^{\intercal} \) is the transpose of the matrix \( \mathbf{A} \).

Parameters
[in]cublas_handleThe cuBLAS object handle.
[in]A1D array that represents a 2D dense array with either C (row) major ordering or Fortran (column) major ordering. The major ordering should de defined by A_is_row_major flag.
[in]bColumn vector
[in]alphaA scalar that scales the matrix vector multiplication.
[in]num_rowsNumber of rows of A
[in]num_columnsNumber of columns of A
[in]A_is_row_majorBoolean, can be 0 or 1 as follows:
  • If A is row major (C ordering where the last index is contiguous) this value should be 1.
  • f A is column major (Fortran ordering where the first index is contiguous), this value should be set to 0.
[in,out]cThe output column vector (written in-place).

Definition at line 294 of file cu_matrix_operations.cu.

303{
304 DataType zero = cu_arithmetics::cast<float, DataType>(0.0f);
305 if (cu_arithmetics::is_equal(alpha, zero))
306 {
307 return;
308 }
309
310 cublasOperation_t trans;
311 int m;
312 int n;
313 int lda;
314 DataType beta = cu_arithmetics::cast<float, DataType>(0.0f);
315 int incb = 1;
316 int incc = 1;
317
318 // Since cublas accepts column major (Fortran) ordering, use non-transpose
319 // for row_major matrix.
320 if (A_is_row_major)
321 {
322 trans = CUBLAS_OP_N;
323 m = num_columns;
324 n = num_rows;
325 }
326 else
327 {
328 trans = CUBLAS_OP_T;
329 m = num_rows;
330 n = num_columns;
331 }
332
333 lda = m;
334
335 // Calling cublas
336 cublasStatus_t status = cublas_api::cublasXgemv<DataType>(
337 cublas_handle, trans, m, n, &alpha, A, lda, b, incb, &beta, c,
338 incc);
339
340 assert(status == CUBLAS_STATUS_SUCCESS);
341}

References cu_arithmetics::abs(), and cu_arithmetics::is_equal().

Referenced by cuDenseMatrix< DataType >::transpose_dot_plus().

Here is the call graph for this function:
Here is the caller graph for this function:

The documentation for this class was generated from the following files: