A static class for matrix-vector operations, which are similar to the level-2 operations of the BLAS library. This class acts as a templated namespace, where all member methods are public and static. More...

#include <c_matrix_operations.h>

Static Public Member Functions
static void	dense_matvec (const DataType A, const DataType b, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *c)
	Computes the matrix vector multiplication \( \boldsymbol{c} = \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is a dense matrix. More...

static void	dense_matvec_plus (const DataType A, const DataType b, const DataType alpha, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *c)
	Computes the operation \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is a dense matrix. More...

static void	dense_transposed_matvec (const DataType A, const DataType b, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *c)
	Computes matrix vector multiplication \(\boldsymbol{c} = \mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is dense, and \( \mathbf{A}^{\intercal} \) is the transpose of the matrix \( \mathbf{A} \). More...

static void	dense_transposed_matvec_plus (const DataType A, const DataType b, const DataType alpha, const LongIndexType num_rows, const LongIndexType num_columns, const FlagType A_is_row_major, DataType *c)
	Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is dense, and \( \mathbf{A}^{\intercal} \) is the transpose of the matrix \( \mathbf{A} \). More...

static void	csr_matvec (const DataType A_data, const LongIndexType A_column_indices, const LongIndexType A_index_pointer, const DataType b, const LongIndexType num_rows, DataType *c)
	Computes \( \boldsymbol{c} = \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector. More...

static void	csr_matvec_plus (const DataType A_data, const LongIndexType A_column_indices, const LongIndexType A_index_pointer, const DataType b, const DataType alpha, const LongIndexType num_rows, DataType *c)
	Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector. More...

static void	csr_transposed_matvec (const DataType A_data, const LongIndexType A_column_indices, const LongIndexType A_index_pointer, const DataType b, const LongIndexType num_rows, const LongIndexType num_columns, DataType *c)
	Computes \(\boldsymbol{c} =\mathbf{A}^{\intercal} \boldsymbol{b}\) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector. More...

static void	csr_transposed_matvec_plus (const DataType A_data, const LongIndexType A_column_indices, const LongIndexType A_index_pointer, const DataType b, const DataType alpha, const LongIndexType num_rows, DataType *c)
	Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A}^{\intercal} \boldsymbol{b}\) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector. More...

static void	csc_matvec (const DataType A_data, const LongIndexType A_row_indices, const LongIndexType A_index_pointer, const DataType b, const LongIndexType num_rows, const LongIndexType num_columns, DataType *c)
	Computes \( \boldsymbol{c} = \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector. More...

static void	csc_matvec_plus (const DataType A_data, const LongIndexType A_row_indices, const LongIndexType A_index_pointer, const DataType b, const DataType alpha, const LongIndexType num_columns, DataType *c)
	Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector. More...

static void	csc_transposed_matvec (const DataType A_data, const LongIndexType A_row_indices, const LongIndexType A_index_pointer, const DataType b, const LongIndexType num_columns, DataType *c)
	Computes \(\boldsymbol{c} =\mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector. More...

static void	csc_transposed_matvec_plus (const DataType A_data, const LongIndexType A_row_indices, const LongIndexType A_index_pointer, const DataType b, const DataType alpha, const LongIndexType num_columns, DataType *c)
	Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector. More...

static void	create_band_matrix (const DataType diagonals, const DataType supdiagonals, const IndexType non_zero_size, const FlagType tridiagonal, DataType **matrix)
	Creates bi-diagonal or symmetric tri-diagonal matrix from the diagonal array (`diagonals`) and off-diagonal array (`supdiagonals`). More...

Detailed Description

template<typename DataType>
class cMatrixOperations< DataType >

A static class for matrix-vector operations, which are similar to the level-2 operations of the BLAS library. This class acts as a templated namespace, where all member methods are public and static.

This class implements matrix-ector multiplication for three types of matrices:

Dense matrix (both row major and column major)
Compressed sparse row matrix (CSR)
Compressed sparse column matrix (CSC)

For each of the above matrix types, there are four kinds of matrix vector multiplications implemented.

dot : performs \( \boldsymbol{c} = \mathbf{A} \boldsymbol{b} \).
dot_plus : performs \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A} \boldsymbol{b} \).
transpose_dot : performs \( \boldsymbol{c} = \mathbf{A}^{\intercal} \boldsymbol{b} \).
transpose_dot_plus : performs \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A}^{\intercal} \boldsymbol{b} \).

See also: cVectorOperations

Definition at line 56 of file c_matrix_operations.h.

Member Function Documentation

◆ create_band_matrix()

template<typename DataType >

void cMatrixOperations< DataType >::create_band_matrix	(	const DataType *	diagonals,
		const DataType *	supdiagonals,
		const IndexType	non_zero_size,
		const FlagType	tridiagonal,
		DataType **	matrix
	)

static

Creates bi-diagonal or symmetric tri-diagonal matrix from the diagonal array (diagonals) and off-diagonal array (supdiagonals).

The output is written in place (in matrix). The output is only written up to the non_zero_size element, that is: matrix[:non_zero_size,:non_zero_size] is filled, and the rest is assumed to be zero.

Depending on tridiagonal, the matrix is upper bi-diagonal or symmetric tri-diagonal.

Parameters

[in]	diagonals	An array of length `n`. All elements `diagonals` create the diagonals of `matrix`.
[in]	supdiagonals	An array of length `n`. Elements `supdiagonals`[0:-1] create the upper off-diagonal of `matrix`, making `matrix` an upper bi-diagonal matrix. In addition, if `tridiagonal` is set to `1`, the lower off-diagonal is also created similar to the upper off-diagonal, making `matrix` a symmetric tri-diagonal matrix.
[in]	non_zero_size	Up to the `matrix`[:non_zero_size,:non_zero_size] of `matrix` will be written. At most, `non_zero_size` can be `n`, which is the size of `diagonals` array and the size of the square matrix. If `non_zero_size` is less than `n`, it is due to the fact that either `diagonals` or `supdiagonals` has zero elements after the `size` element (possibly due to early termination of Lanczos iterations method).
[in]	tridiagonal	Boolean. If set to `0`, the matrix `T` becomes upper bi-diagonal. If set to `1`, the matrix becomes symmetric tri-diagonal.
[out]	matrix	A 2D matrix (written in place) of the shape (n,n). This is the output of this function. This matrix is assumed to be initialized to zero before calling this function.

Definition at line 1021 of file c_matrix_operations.cpp.

 {
     for (IndexType j=0; j < non_zero_size; ++j)
     {
         // Diagonals
         matrix[j][j] = diagonals[j];
  
         // Off diagonals
         if (j < non_zero_size-1)
         {
             // Sup-diagonal
             matrix[j][j+1] = supdiagonals[j];
  
             // Sub-diagonal, making symmetric tri-diagonal matrix
             if (tridiagonal)
             {
                 matrix[j+1][j] = supdiagonals[j];
             }
         }
     }
 }

◆ csc_matvec()

template<typename DataType >

void cMatrixOperations< DataType >::csc_matvec	(	const DataType *	A_data,
		const LongIndexType *	A_row_indices,
		const LongIndexType *	A_index_pointer,
		const DataType *	b,
		const LongIndexType	num_rows,
		const LongIndexType	num_columns,
		DataType *	c
	)

static

Computes \( \boldsymbol{c} = \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

Parameters

[in]	A_data	CSC format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]	A_row_indices	CSC format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]	A_index_pointer	CSC format index pointer. The length of this array is one plus the number of columns of the matrix. Also, the first element of this array is `0`, and the last element is the nnz of the matrix.
[in]	b	Column vector with same size of the number of columns of `A`.
[in]	num_rows	Number of rows of the matrix `A`.
[in]	num_columns	Number of columns of the matrix `A`. This is essentially the size of `A_index_pointer` array minus one.
[out]	c	Output column vector with the same size as `b`. This array is written in-place.

Definition at line 735 of file c_matrix_operations.cpp.

 {
     LongIndexType index_pointer;
     LongIndexType row;
     LongIndexType column;
  
     // Initialize output to zero
     for (row=0; row < num_rows; ++row)
     {
         c[row] = 0.0;
     }
  
     for (column=0; column < num_columns; ++column)
     {
         for (index_pointer=A_index_pointer[column];
              index_pointer < A_index_pointer[column+1];
              ++index_pointer)
         {
             row = A_row_indices[index_pointer];
             c[row] += A_data[index_pointer] * b[column];
         }
     }
 }

Referenced by cCSCMatrix< DataType >::dot().

Here is the caller graph for this function:

◆ csc_matvec_plus()

template<typename DataType >

void cMatrixOperations< DataType >::csc_matvec_plus	(	const DataType *	A_data,
		const LongIndexType *	A_row_indices,
		const LongIndexType *	A_index_pointer,
		const DataType *	b,
		const DataType	alpha,
		const LongIndexType	num_columns,
		DataType *	c
	)

static

Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

Parameters

[in]	A_data	CSC format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]	A_row_indices	CSC format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]	A_index_pointer	CSC format index pointer. The length of this array is one plus the number of columns of the matrix. Also, the first element of this array is `0`, and the last element is the nnz of the matrix.
[in]	b	Column vector with same size of the number of columns of `A`.
[in]	alpha	A scalar that scales the matrix vector multiplication.
[in]	num_columns	Number of columns of the matrix `A`. This is essentially the size of `A_index_pointer` array minus one.
[in,out]	c	Output column vector with the same size as `b`. This array is written in-place.

Definition at line 801 of file c_matrix_operations.cpp.

 {
     if (alpha == 0.0)
     {
         return;
     }
  
     LongIndexType index_pointer;
     LongIndexType row;
     LongIndexType column;
  
     for (column=0; column < num_columns; ++column)
     {
         for (index_pointer=A_index_pointer[column];
              index_pointer < A_index_pointer[column+1];
              ++index_pointer)
         {
             row = A_row_indices[index_pointer];
             c[row] += alpha * A_data[index_pointer] * b[column];
         }
     }
 }

Referenced by cCSCMatrix< DataType >::dot_plus().

Here is the caller graph for this function:

◆ csc_transposed_matvec()

template<typename DataType >

void cMatrixOperations< DataType >::csc_transposed_matvec	(	const DataType *	A_data,
		const LongIndexType *	A_row_indices,
		const LongIndexType *	A_index_pointer,
		const DataType *	b,
		const LongIndexType	num_columns,
		DataType *	c
	)

static

Computes \(\boldsymbol{c} =\mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

The reduction variable (here, sum ) is of the type long double. This is becase when DataType is float, the summation loses the precision, especially when the vector size is large. It seems that using long double is slightly faster than using double. The advantage of using a type with larger bits for the reduction variable is only sensible if the compiler is optimized with -O2 or -O3 flags.

Parameters

[in]	A_data	CSC format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]	A_row_indices	CSC format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]	A_index_pointer	CSC format index pointer. The length of this array is one plus the number of columns of the matrix. Also, the first element of this array is `0`, and the last element is the nnz of the matrix.
[in]	b	Column vector with same size of the number of columns of `A`.
	num_columns	Number of columns of the matrix `A`. This is essentially the size of `A_index_pointer` array minus one.
[out]	c	Output column vector with the same size as `b`. This array is written in-place.

Definition at line 872 of file c_matrix_operations.cpp.

 {
     LongIndexType index_pointer;
     LongIndexType row;
     LongIndexType column;
     long double sum;
  
     for (column=0; column < num_columns; ++column)
     {
         sum = 0.0;
         for (index_pointer=A_index_pointer[column];
              index_pointer < A_index_pointer[column+1];
              ++index_pointer)
         {
             row = A_row_indices[index_pointer];
             sum += A_data[index_pointer] * b[row];
         }
         c[column] = static_cast<DataType>(sum);
     }
 }

Referenced by cCSCMatrix< DataType >::transpose_dot().

Here is the caller graph for this function:

◆ csc_transposed_matvec_plus()

template<typename DataType >

void cMatrixOperations< DataType >::csc_transposed_matvec_plus	(	const DataType *	A_data,
		const LongIndexType *	A_row_indices,
		const LongIndexType *	A_index_pointer,
		const DataType *	b,
		const DataType	alpha,
		const LongIndexType	num_columns,
		DataType *	c
	)

static

Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse column (CSC) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

The reduction variable (here, sum ) is of the type long double. This is becase when DataType is float the summation loses the precision, especially when the vector size is large. It seems that using long double is slightly faster than using double. The advantage of using a type with larger bits for the reduction variable is only sensible if the compiler is optimized with -O2 or -O3 flags.

Parameters

[in]	A_data	CSC format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]	A_row_indices	CSC format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]	A_index_pointer	CSC format index pointer. The length of this array is one plus the number of columns of the matrix. Also, the first element of this array is `0`, and the last element is the nnz of the matrix.
[in]	b	Column vector with same size of the number of columns of `A`.
[in]	alpha	A scalar that scales the matrix vector multiplication.
	num_columns	Number of columns of the matrix `A`. This is essentially the size of `A_index_pointer` array minus one.
[in,out]	c	Output column vector with the same size as `b`. This array is written in-place.

Definition at line 943 of file c_matrix_operations.cpp.

 {
     if (alpha == 0.0)
     {
         return;
     }
  
     LongIndexType index_pointer;
     LongIndexType row;
     LongIndexType column;
     long double sum;
  
     for (column=0; column < num_columns; ++column)
     {
         sum = 0.0;
         for (index_pointer=A_index_pointer[column];
              index_pointer < A_index_pointer[column+1];
              ++index_pointer)
         {
             row = A_row_indices[index_pointer];
             sum += A_data[index_pointer] * b[row];
         }
         c[column] += static_cast<DataType>(alpha * sum);
     }
 }

Referenced by cCSCMatrix< DataType >::transpose_dot_plus().

Here is the caller graph for this function:

◆ csr_matvec()

template<typename DataType >

void cMatrixOperations< DataType >::csr_matvec	(	const DataType *	A_data,
		const LongIndexType *	A_column_indices,
		const LongIndexType *	A_index_pointer,
		const DataType *	b,
		const LongIndexType	num_rows,
		DataType *	c
	)

static

Computes \( \boldsymbol{c} = \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

The reduction variable (here, sum ) is of the type long double. This is becase when DataType is float, the summation loses the precision, especially when the vector size is large. It seems that using long double is slightly faster than using double. The advantage of using a type with larger bits for the reduction variable is only sensible if the compiler is optimized with -O2 or -O3 flags.

Parameters

[in]	A_data	CSR format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]	A_column_indices	CSR format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]	A_index_pointer	CSR format index pointer. The length of this array is one plus the number of rows of the matrix. Also, the first element of this array is `0`, and the last element is the nnz of the matrix.
[in]	b	Column vector with same size of the number of columns of `A`.
[in]	num_rows	Number of rows of the matrix `A`. This is essentially the size of `A_index_pointer` array minus one.
[out]	c	Output column vector with the same size as `b`. This array is written in-place.

Definition at line 469 of file c_matrix_operations.cpp.

 {
     LongIndexType index_pointer;
     LongIndexType row;
     LongIndexType column;
     long double sum;
  
     for (row=0; row < num_rows; ++row)
     {
         sum = 0.0;
         for (index_pointer=A_index_pointer[row];
              index_pointer < A_index_pointer[row+1];
              ++index_pointer)
         {
             column = A_column_indices[index_pointer];
             sum += A_data[index_pointer] * b[column];
         }
         c[row] = static_cast<DataType>(sum);
     }
 }

Referenced by cCSRMatrix< DataType >::dot().

Here is the caller graph for this function:

◆ csr_matvec_plus()

template<typename DataType >

void cMatrixOperations< DataType >::csr_matvec_plus	(	const DataType *	A_data,
		const LongIndexType *	A_column_indices,
		const LongIndexType *	A_index_pointer,
		const DataType *	b,
		const DataType	alpha,
		const LongIndexType	num_rows,
		DataType *	c
	)

static

Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

The reduction variable (here, sum ) is of the type long double. This is becase when DataType is float the summation loses the precision, especially when the vector size is large. It seems that using long double is slightly faster than using double. The advantage of using a type with larger bits for the reduction variable is only sensible if the compiler is optimized with -O2 or -O3 flags.

Parameters

[in]	A_data	CSR format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]	A_column_indices	CSR format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]	A_index_pointer	CSR format index pointer. The length of this array is one plus the number of rows of the matrix. Also, the first element of this array is `0`, and the last element is the nnz of the matrix.
[in]	b	Column vector with same size of the number of columns of `A`.
[in]	alpha	A scalar that scales the matrix vector multiplication.
[in]	num_rows	Number of rows of the matrix `A`. This is essentially the size of `A_index_pointer` array minus one.
[in,out]	c	Output column vector with the same size as `b`. This array is written in-place.

Definition at line 540 of file c_matrix_operations.cpp.

 {
     if (alpha == 0.0)
     {
         return;
     }
  
     LongIndexType index_pointer;
     LongIndexType row;
     LongIndexType column;
     long double sum;
  
     for (row=0; row < num_rows; ++row)
     {
         sum = 0.0;
         for (index_pointer=A_index_pointer[row];
              index_pointer < A_index_pointer[row+1];
              ++index_pointer)
         {
             column = A_column_indices[index_pointer];
             sum += A_data[index_pointer] * b[column];
         }
         c[row] += alpha * static_cast<DataType>(sum);
     }
 }

Referenced by cCSRMatrix< DataType >::dot_plus().

Here is the caller graph for this function:

◆ csr_transposed_matvec()

template<typename DataType >

void cMatrixOperations< DataType >::csr_transposed_matvec	(	const DataType *	A_data,
		const LongIndexType *	A_column_indices,
		const LongIndexType *	A_index_pointer,
		const DataType *	b,
		const LongIndexType	num_rows,
		const LongIndexType	num_columns,
		DataType *	c
	)

static

Computes \(\boldsymbol{c} =\mathbf{A}^{\intercal} \boldsymbol{b}\) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

Parameters

[in]	A_data	CSR format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]	A_column_indices	CSR format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]	A_index_pointer	CSR format index pointer. The length of this array is one plus the number of rows of the matrix. Also, the first element of this array is `0`, and the last element is the nnz of the matrix.
[in]	b	Column vector with same size of the number of columns of `A`.
[in]	num_rows	Number of rows of the matrix `A`. This is essentially the size of `A_index_pointer` array minus one.
[in]	num_columns	Number of columns of the matrix `A`.
[out]	c	Output column vector with the same size as `b`. This array is written in-place.

Definition at line 606 of file c_matrix_operations.cpp.

 {
     LongIndexType index_pointer;
     LongIndexType row;
     LongIndexType column;
  
     // Initialize output to zero
     for (column=0; column < num_columns; ++column)
     {
         c[column] = 0.0;
     }
  
     for (row=0; row < num_rows; ++row)
     {
         for (index_pointer=A_index_pointer[row];
              index_pointer < A_index_pointer[row+1];
              ++index_pointer)
         {
             column = A_column_indices[index_pointer];
             c[column] += A_data[index_pointer] * b[row];
         }
     }
 }

Referenced by cCSRMatrix< DataType >::transpose_dot().

Here is the caller graph for this function:

◆ csr_transposed_matvec_plus()

template<typename DataType >

void cMatrixOperations< DataType >::csr_transposed_matvec_plus	(	const DataType *	A_data,
		const LongIndexType *	A_column_indices,
		const LongIndexType *	A_index_pointer,
		const DataType *	b,
		const DataType	alpha,
		const LongIndexType	num_rows,
		DataType *	c
	)

static

Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A}^{\intercal} \boldsymbol{b}\) where \( \mathbf{A} \) is compressed sparse row (CSR) matrix and \( \boldsymbol{b} \) is a dense vector. The output \( \boldsymbol{c} \) is a dense vector.

Parameters

[in]	A_data	CSR format data array of the sparse matrix. The length of this array is the nnz of the matrix.
[in]	A_column_indices	CSR format column indices of the sparse matrix. The length of this array is the nnz of the matrix.
[in]	A_index_pointer	CSR format index pointer. The length of this array is one plus the number of rows of the matrix. Also, the first element of this array is `0`, and the last element is the nnz of the matrix.
[in]	b	Column vector with same size of the number of columns of `A`.
[in]	alpha	A scalar that scales the matrix vector multiplication.
[in]	num_rows	Number of rows of the matrix `A`. This is essentially the size of `A_index_pointer` array minus one.
[in,out]	c	Output column vector with the same size as `b`. This array is written in-place.

Definition at line 672 of file c_matrix_operations.cpp.

 {
     if (alpha == 0.0)
     {
         return;
     }
  
     LongIndexType index_pointer;
     LongIndexType row;
     LongIndexType column;
  
     for (row=0; row < num_rows; ++row)
     {
         for (index_pointer=A_index_pointer[row];
              index_pointer < A_index_pointer[row+1];
              ++index_pointer)
         {
             column = A_column_indices[index_pointer];
             c[column] += alpha * A_data[index_pointer] * b[row];
         }
     }
 }

Referenced by cCSRMatrix< DataType >::transpose_dot_plus().

Here is the caller graph for this function:

◆ dense_matvec()

template<typename DataType >

void cMatrixOperations< DataType >::dense_matvec	(	const DataType *	A,
		const DataType *	b,
		const LongIndexType	num_rows,
		const LongIndexType	num_columns,
		const FlagType	A_is_row_major,
		DataType *	c
	)

static

Computes the matrix vector multiplication \( \boldsymbol{c} = \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is a dense matrix.

The reduction variable (here, sum ) is of the type long double. This is becase when DataType is float, the summation loses the precision, especially when the vector size is large. It seems that using long double is slightly faster than using double. The advantage of using a type with larger bits for the reduction variable is only sensible if the compiler is optimized with -O2 or -O3 flags.

Parameters

[in]	A	1D array that represents a 2D dense array with either C (row) major ordering or Fortran (column) major ordering. The major ordering should de defined by `A_is_row_major` flag.
[in]	b	Column vector
[in]	num_rows	Number of rows of `A`
[in]	num_columns	Number of columns of `A`
[in]	A_is_row_major	Boolean, can be `0` or `1` as follows: If `A` is row major (C ordering where the last index is contiguous) this value should be `1`. If `A` is column major (Fortran ordering where the first index is contiguous), this value should be set to `0`.
[out]	c	The output column vector (written in-place).

Definition at line 61 of file c_matrix_operations.cpp.

 {
     #if (USE_CBLAS == 1)
  
     // Using OpenBlas
     CBLAS_LAYOUT layout;
     if (A_is_row_major)
     {
         layout = CblasRowMajor;
     }
     else
     {
         layout = CblasColMajor;
     }
  
     CBLAS_TRANSPOSE transpose = CblasNoTrans;
     int lda = num_rows;
     int incb = 1;
     int incc = 1;
     DataType alpha = 1.0;
     DataType beta = 0.0;
  
     cblas_interface::xgemv(layout, transpose, num_rows, num_columns, alpha, A,
                            lda, b, incb, beta, c, incc);
  
     #else
  
     // Not using OpenBlas
     LongIndexType j;
     long double sum;
     LongIndexType chunk = 5;
     LongIndexType num_columns_chunked = num_columns - (num_columns % chunk);
  
     // Determine major order of A
     if (A_is_row_major)
     {
         // For row major (C ordering) matrix A
         for (LongIndexType i=0; i < num_rows; ++i)
         {
             sum = 0.0;
             for (j=0; j < num_columns_chunked; j+= chunk)
             {
                 sum += A[i*num_columns + j] * b[j] +
                        A[i*num_columns + j+1] * b[j+1] +
                        A[i*num_columns + j+2] * b[j+2] +
                        A[i*num_columns + j+3] * b[j+3] +
                        A[i*num_columns + j+4] * b[j+4];
             }
  
             for (j= num_columns_chunked; j < num_columns; ++j)
             {
                 sum += A[i*num_columns + j] * b[j];
             }
  
             c[i] = static_cast<DataType>(sum);
         }
     }
     else
     {
         // For column major (Fortran ordering) matrix A
         for (LongIndexType i=0; i < num_rows; ++i)
         {
             sum = 0.0;
             for (j=0; j < num_columns; ++j)
             {
                 sum += A[i + num_rows*j] * b[j];
             }
             c[i] = static_cast<DataType>(sum);
         }
     }
  
     #endif
 }

Referenced by cDenseMatrix< DataType >::dot().

Here is the caller graph for this function:

◆ dense_matvec_plus()

template<typename DataType >

void cMatrixOperations< DataType >::dense_matvec_plus	(	const DataType *	A,
		const DataType *	b,
		const DataType	alpha,
		const LongIndexType	num_rows,
		const LongIndexType	num_columns,
		const FlagType	A_is_row_major,
		DataType *	c
	)

static

Computes the operation \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A} \boldsymbol{b} \) where \( \mathbf{A} \) is a dense matrix.

The reduction variable (here, sum ) is of the type long double. This is becase when DataType is float the summation loses the precision, especially when the vector size is large. It seems that using long double is slightly faster than using double. The advantage of using a type with larger bits for the reduction variable is only sensible if the compiler is optimized with -O2 or -O3 flags.

Parameters

[in]	A	1D array that represents a 2D dense array with either C (row) major ordering or Fortran (column) major ordering. The major ordering should de defined by `A_is_row_major` flag.
[in]	b	Column vector
[in]	alpha	A scalar that scales the matrix vector multiplication.
[in]	num_rows	Number of rows of `A`
[in]	num_columns	Number of columns of `A`
[in]	A_is_row_major	Boolean, can be `0` or `1` as follows: If `A` is row major (C ordering where the last index is contiguous) this value should be `1`. If `A` is column major (Fortran ordering where the first index is contiguous), this value should be set to `0`.
[in,out]	c	The output column vector (written in-place).

Definition at line 181 of file c_matrix_operations.cpp.

 {
     if (alpha == 0.0)
     {
         return;
     }
  
     LongIndexType j;
     long double sum;
     LongIndexType chunk = 5;
     LongIndexType num_columns_chunked = num_columns - (num_columns % chunk);
  
     // Determine major order of A
     if (A_is_row_major)
     {
         // For row major (C ordering) matrix A
         for (LongIndexType i=0; i < num_rows; ++i)
         {
             sum = 0.0;
             for (j=0; j < num_columns_chunked; j+= chunk)
             {
                 sum += A[i*num_columns + j] * b[j] +
                        A[i*num_columns + j+1] * b[j+1] +
                        A[i*num_columns + j+2] * b[j+2] +
                        A[i*num_columns + j+3] * b[j+3] +
                        A[i*num_columns + j+4] * b[j+4];
             }
  
             for (j= num_columns_chunked; j < num_columns; ++j)
             {
                 sum += A[i*num_columns + j] * b[j];
             }
  
             c[i] += alpha * static_cast<DataType>(sum);
         }
     }
     else
     {
         // For column major (Fortran ordering) matrix A
         for (LongIndexType i=0; i < num_rows; ++i)
         {
             sum = 0.0;
             for (j=0; j < num_columns; ++j)
             {
                 sum += A[i + num_rows*j] * b[j];
             }
             c[i] += alpha* static_cast<DataType>(sum);
         }
     }
 }

Referenced by cDenseMatrix< DataType >::dot_plus().

Here is the caller graph for this function:

◆ dense_transposed_matvec()

template<typename DataType >

void cMatrixOperations< DataType >::dense_transposed_matvec	(	const DataType *	A,
		const DataType *	b,
		const LongIndexType	num_rows,
		const LongIndexType	num_columns,
		const FlagType	A_is_row_major,
		DataType *	c
	)

static

Computes matrix vector multiplication \(\boldsymbol{c} = \mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is dense, and \( \mathbf{A}^{\intercal} \) is the transpose of the matrix \( \mathbf{A} \).

The reduction variable (here, sum ) is of the type long double. This is becase when DataType is float, the summation loses the precision, especially when the vector size is large. It seems that using long double is slightly faster than using double. The advantage of using a type with larger bits for the reduction variable is only sensible if the compiler is optimized with -O2 or -O3 flags.

Parameters

[in]	A	1D array that represents a 2D dense array with either C (row) major ordering or Fortran (column) major ordering. The major ordering should de defined by `A_is_row_major` flag.
[in]	b	Column vector
[in]	num_rows	Number of rows of `A`
[in]	num_columns	Number of columns of `A`
[in]	A_is_row_major	Boolean, can be `0` or `1` as follows: If `A` is row major (C ordering where the last index is contiguous) this value should be `1`. f `A` is column major (Fortran ordering where the first index is contiguous), this value should be set to `0`.
[out]	c	The output column vector (written in-place).

Definition at line 278 of file c_matrix_operations.cpp.

 {
     LongIndexType i;
     long double sum;
     LongIndexType chunk = 5;
     LongIndexType num_rows_chunked = num_rows - (num_rows % chunk);
  
     // Determine major order of A
     if (A_is_row_major)
     {
         // For row major (C ordering) matrix A
         for (LongIndexType j=0; j < num_columns; ++j)
         {
             sum = 0.0;
             for (i=0; i < num_rows; ++i)
             {
                 sum += A[i*num_columns + j] * b[i];
             }
             c[j] = static_cast<DataType>(sum);
         }
     }
     else
     {
         // For column major (Fortran ordering) matrix A
         for (LongIndexType j=0; j < num_columns; ++j)
         {
             sum = 0.0;
             for (i=0; i < num_rows_chunked; i += chunk)
             {
                 sum += A[i + num_rows*j] * b[i] +
                        A[i+1 + num_rows*j] * b[i+1] +
                        A[i+2 + num_rows*j] * b[i+2] +
                        A[i+3 + num_rows*j] * b[i+3] +
                        A[i+4 + num_rows*j] * b[i+4];
             }
  
             for (i=num_rows_chunked; i < num_rows; ++i)
             {
                 sum += A[i + num_rows*j] * b[i];
             }
  
             c[j] = static_cast<DataType>(sum);
         }
     }
 }

Referenced by cDenseMatrix< DataType >::transpose_dot().

Here is the caller graph for this function:

◆ dense_transposed_matvec_plus()

template<typename DataType >

void cMatrixOperations< DataType >::dense_transposed_matvec_plus	(	const DataType *	A,
		const DataType *	b,
		const DataType	alpha,
		const LongIndexType	num_rows,
		const LongIndexType	num_columns,
		const FlagType	A_is_row_major,
		DataType *	c
	)

static

Computes \( \boldsymbol{c} = \boldsymbol{c} + \alpha \mathbf{A}^{\intercal} \boldsymbol{b} \) where \( \mathbf{A} \) is dense, and \( \mathbf{A}^{\intercal} \) is the transpose of the matrix \( \mathbf{A} \).

The reduction variable (here, sum ) is of the type long double. This is becase when DataType is float the summation loses the precision, especially when the vector size is large. It seems that using long double is slightly faster than using double. The advantage of using a type with larger bits for the reduction variable is only sensible if the compiler is optimized with -O2 or -O3 flags.

Parameters

[in]	A	1D array that represents a 2D dense array with either C (row) major ordering or Fortran (column) major ordering. The major ordering should de defined by `A_is_row_major` flag.
[in]	b	Column vector
[in]	alpha	A scalar that scales the matrix vector multiplication.
[in]	num_rows	Number of rows of `A`
[in]	num_columns	Number of columns of `A`
[in]	A_is_row_major	Boolean, can be `0` or `1` as follows: If `A` is row major (C ordering where the last index is contiguous) this value should be `1`. f `A` is column major (Fortran ordering where the first index is contiguous), this value should be set to `0`.
[in,out]	c	The output column vector (written in-place).

Definition at line 371 of file c_matrix_operations.cpp.

 {
     if (alpha == 0.0)
     {
         return;
     }
  
     LongIndexType i;
     long double sum;
     LongIndexType chunk = 5;
     LongIndexType num_rows_chunked = num_rows - (num_rows % chunk);
  
     // Determine major order of A
     if (A_is_row_major)
     {
         // For row major (C ordering) matrix A
         for (LongIndexType j=0; j < num_columns; ++j)
         {
             sum = 0.0;
             for (i=0; i < num_rows; ++i)
             {
                 sum += A[i*num_columns + j] * b[i];
             }
             c[j] += alpha * static_cast<DataType>(sum);
         }
     }
     else
     {
         // For column major (Fortran ordering) matrix A
         for (LongIndexType j=0; j < num_columns; ++j)
         {
             sum = 0.0;
             for (i=0; i < num_rows_chunked; i += chunk)
             {
                 sum += A[i + num_rows*j] * b[i] +
                        A[i+1 + num_rows*j] * b[i+1] +
                        A[i+2 + num_rows*j] * b[i+2] +
                        A[i+3 + num_rows*j] * b[i+3] +
                        A[i+4 + num_rows*j] * b[i+4];
             }
  
             for (i=num_rows_chunked; i < num_rows; ++i)
             {
                 sum += A[i + num_rows*j] * b[i];
             }
  
             c[j] += alpha * static_cast<DataType>(sum);
         }
     }
 }

Referenced by cDenseMatrix< DataType >::transpose_dot_plus().

Here is the caller graph for this function:

The documentation for this class was generated from the following files:

/home/runner/work/imate/imate/imate/_c_basic_algebra/c_matrix_operations.h
/home/runner/work/imate/imate/imate/_c_basic_algebra/c_matrix_operations.cpp

Static Public Member Functions

Detailed Description

template<typename DataType> class cMatrixOperations< DataType >

Member Function Documentation

◆ create_band_matrix()

◆ csc_matvec()

◆ csc_matvec_plus()

◆ csc_transposed_matvec()

◆ csc_transposed_matvec_plus()

◆ csr_matvec()

◆ csr_matvec_plus()

◆ csr_transposed_matvec()

◆ csr_transposed_matvec_plus()

◆ dense_matvec()

◆ dense_matvec_plus()

◆ dense_transposed_matvec()

◆ dense_transposed_matvec_plus()

template<typename DataType>
class cMatrixOperations< DataType >