#include <cu_csc_matrix.h>

Inheritance diagram for cuCSCMatrix< DataType >:

[legend]

Collaboration diagram for cuCSCMatrix< DataType >:

[legend]

Public Member Functions
	cuCSCMatrix ()
	Default constructor. More...

	cuCSCMatrix (const DataType A_data_, const LongIndexType A_indices_, const LongIndexType *A_index_pointer_, const LongIndexType num_rows_, const LongIndexType num_columns_, const int num_gpu_devices_)
	Constructor with arguments. More...

virtual	~cuCSCMatrix ()
	Virtual desructor. More...

virtual void	dot (const DataType device_vector, DataType device_product)

virtual void	dot_plus (const DataType device_vector, const DataType alpha, DataType device_product)

virtual void	transpose_dot (const DataType device_vector, DataType device_product)

virtual void	transpose_dot_plus (const DataType device_vector, const DataType alpha, DataType device_product)

Public Member Functions inherited from cuMatrix< DataType >
	cuMatrix ()

	cuMatrix (int num_gpu_devices_)

virtual	~cuMatrix ()

Public Member Functions inherited from cuLinearOperator< DataType >
	cuLinearOperator ()

	cuLinearOperator (int num_gpu_devices_)
	Constructor with setting `num_rows` and `num_columns`. More...

virtual	~cuLinearOperator ()

cublasHandle_t	get_cublas_handle () const
	This function returns a reference to the `cublasHandle_t` object. The object will be created, if it is not created already. More...

Public Member Functions inherited from cLinearOperator< DataType >
	cLinearOperator ()
	Default constructor. More...

	cLinearOperator (const LongIndexType num_rows_, const LongIndexType num_columns_)
	Constructor with setting `num_rows` and `num_columns`. More...

virtual	~cLinearOperator ()

LongIndexType	get_num_rows () const

LongIndexType	get_num_columns () const

void	set_parameters (DataType *parameters_)
	Sets the scalar parameter `this->parameters`. Parameter is initialized to `NULL`. However, before calling `dot` or `transpose_dot` functions, the parameters must be set. More...

IndexType	get_num_parameters () const

FlagType	is_eigenvalue_relation_known () const
	Returns a flag that determines whether a relation between the parameters of the operator and its eigenvalue(s) is known. More...

Public Member Functions inherited from cCSCMatrix< DataType >
	cCSCMatrix ()

	cCSCMatrix (const DataType A_data_, const LongIndexType A_indices_, const LongIndexType *A_index_pointer_, const LongIndexType num_rows_, const LongIndexType num_columns_)

virtual	~cCSCMatrix ()

virtual FlagType	is_identity_matrix () const
	Checks whether the matrix is identity. More...

LongIndexType	get_nnz () const
	Returns the number of non-zero elements of the sparse matrix. More...

Public Member Functions inherited from cMatrix< DataType >
	cMatrix ()
	Default constructor. More...

virtual	~cMatrix ()

DataType	get_eigenvalue (const DataType known_parameters, const DataType known_eigenvalue, const DataType inquiry_parameters) const
	This virtual function is implemented from its pure virtual function of the base class. In this class, this functio has no use and was only implemented so that this class be able to be instantiated (due to the pure virtual function). More...

Protected Member Functions
virtual void	copy_host_to_device ()
	Copies the member data from the host memory to the device memory. More...

void	allocate_buffer (const int device_id, cusparseOperation_t cusparse_operation, const DataType alpha, const DataType beta, cusparseDnVecDescr_t &cusparse_input_vector, cusparseDnVecDescr_t &cusparse_output_vector, cusparseSpMVAlg_t algorithm)
	Allocates an external buffer for matrix-vector multiplication using `cusparseSpMV` function. More...

Protected Member Functions inherited from cuLinearOperator< DataType >
int	query_gpu_devices () const
	Before any numerical computation, this method chechs if any gpu device is available on the machine, or notifies the user if nothing was found. More...

void	initialize_cublas_handle ()
	Creates a `cublasHandle_t` object, if not created already. More...

void	initialize_cusparse_handle ()
	Creates a `cusparseHandle_t` object, if not created already. More...

Protected Attributes
DataType **	device_A_data

LongIndexType **	device_A_indices

LongIndexType **	device_A_index_pointer

void **	device_buffer

size_t *	device_buffer_num_bytes

cusparseSpMatDescr_t *	cusparse_matrix_A

Protected Attributes inherited from cuLinearOperator< DataType >
int	num_gpu_devices

bool	copied_host_to_device

cublasHandle_t *	cublas_handle

cusparseHandle_t *	cusparse_handle

Protected Attributes inherited from cLinearOperator< DataType >
const LongIndexType	num_rows

const LongIndexType	num_columns

FlagType	eigenvalue_relation_known

DataType *	parameters

IndexType	num_parameters

Protected Attributes inherited from cCSCMatrix< DataType >
const DataType *	A_data

const LongIndexType *	A_indices

const LongIndexType *	A_index_pointer

Detailed Description

template<typename DataType>
class cuCSCMatrix< DataType >

Definition at line 30 of file cu_csc_matrix.h.

Constructor & Destructor Documentation

◆ cuCSCMatrix() [1/2]

template<typename DataType >

cuCSCMatrix< DataType >::cuCSCMatrix

Default constructor.

Definition at line 33 of file cu_csc_matrix.cu.

                                   :
     device_A_data(NULL),
     device_A_indices(NULL),
     device_A_index_pointer(NULL),
     device_buffer(NULL),
     device_buffer_num_bytes(NULL),
     cusparse_matrix_A(NULL)
 {
 }

◆ cuCSCMatrix() [2/2]

template<typename DataType >

cuCSCMatrix< DataType >::cuCSCMatrix	(	const DataType *	A_data_,
		const LongIndexType *	A_indices_,
		const LongIndexType *	A_index_pointer_,
		const LongIndexType	num_rows_,
		const LongIndexType	num_columns_,
		const int	num_gpu_devices_
	)

Constructor with arguments.

Definition at line 52 of file cu_csc_matrix.cu.

                                    :
  
     // Base class constructor
     cLinearOperator<DataType>(num_rows_, num_columns_),
     cCSCMatrix<DataType>(A_data_, A_indices_, A_index_pointer_, num_rows_,
                          num_columns_),
     cuMatrix<DataType>(num_gpu_devices_),
  
     // Initializer list
     device_A_data(NULL),
     device_A_indices(NULL),
     device_A_index_pointer(NULL),
     device_buffer(NULL),
     cusparse_matrix_A(NULL)
 {
     this->initialize_cusparse_handle();
     this->copy_host_to_device();
  
     // Initialize device buffer
     this->device_buffer = new void*[this->num_gpu_devices];
     this->device_buffer_num_bytes = new size_t[this->num_gpu_devices];
     for (int device_id=0; device_id < this->num_gpu_devices; ++device_id)
     {
         this->device_buffer[device_id] = NULL;
         this->device_buffer_num_bytes[device_id] = 0;
     }
 }

References cuCSCMatrix< DataType >::copy_host_to_device(), cuCSCMatrix< DataType >::device_buffer, cuCSCMatrix< DataType >::device_buffer_num_bytes, cuLinearOperator< DataType >::initialize_cusparse_handle(), and cuLinearOperator< DataType >::num_gpu_devices.

Here is the call graph for this function:

◆ ~cuCSCMatrix()

template<typename DataType >

cuCSCMatrix< DataType >::~cuCSCMatrix

virtual

Virtual desructor.

Definition at line 95 of file cu_csc_matrix.cu.

 {
     // Member objects exist if the second constructor was called.
     if (this->copied_host_to_device)
     {
         // Deallocate arrays of data on gpu
         for (int device_id=0; device_id < this->num_gpu_devices; ++device_id)
         {
             // Switch to a device
             CudaInterface<DataType>::set_device(device_id);
  
             // Deallocate
             CudaInterface<DataType>::del(this->device_A_data[device_id]);
             CudaInterface<LongIndexType>::del(
                     this->device_A_indices[device_id]);
             CudaInterface<LongIndexType>::del(
                     this->device_A_index_pointer[device_id]);
             CudaInterface<LongIndexType>::del(this->device_buffer[device_id]);
             cusparse_interface::destroy_cusparse_matrix(
                     this->cusparse_matrix_A[device_id]);
         }
     }
  
     // Deallocate arrays of pointers on cpu
     if (this->device_A_data != NULL)
     {
         delete[] this->device_A_data;
         this->device_A_data = NULL;
     }
  
     if (this->device_A_indices != NULL)
     {
         delete[] this->device_A_indices;
         this->device_A_indices = NULL;
     }
  
     if (this->device_A_index_pointer != NULL)
     {
         delete[] this->device_A_index_pointer;
         this->device_A_index_pointer = NULL;
     }
  
     if (this->device_buffer != NULL)
     {
         delete[] this->device_buffer;
         this->device_buffer = NULL;
     }
  
     if (this->device_buffer_num_bytes != NULL)
     {
         delete[] this->device_buffer_num_bytes;
         this->device_buffer_num_bytes = NULL;
     }
  
     if (this->cusparse_matrix_A != NULL)
     {
         delete[] this->cusparse_matrix_A;
         this->cusparse_matrix_A = NULL;
     }
 }

References CudaInterface< ArrayType >::del(), cusparse_interface::destroy_cusparse_matrix(), and CudaInterface< ArrayType >::set_device().

Here is the call graph for this function:

Member Function Documentation

◆ allocate_buffer()

template<typename DataType >

void cuCSCMatrix< DataType >::allocate_buffer	(	const int	device_id,
		cusparseOperation_t	cusparse_operation,
		const DataType	alpha,
		const DataType	beta,
		cusparseDnVecDescr_t &	cusparse_input_vector,
		cusparseDnVecDescr_t &	cusparse_output_vector,
		cusparseSpMVAlg_t	algorithm
	)

protected

Allocates an external buffer for matrix-vector multiplication using cusparseSpMV function.

If buffer size if not the same as required buffer size, allocate (or reallocate) memory. The allocation is always performed in the first call of this function since buffer size is initialized to zero in constructor. But for the next calls it might not be reallocated if the buffer size is the same.

Definition at line 250 of file cu_csc_matrix.cu.

 {
     // Find the buffer size needed for matrix-vector multiplication
     size_t required_buffer_size;
     cusparse_interface::cusparse_matrix_buffer_size(
             this->cusparse_handle[device_id], cusparse_operation, alpha,
             this->cusparse_matrix_A[device_id], cusparse_input_vector, beta,
             cusparse_output_vector, algorithm, &required_buffer_size);
  
     if (this->device_buffer_num_bytes[device_id] != required_buffer_size)
     {
         // Update the buffer size
         this->device_buffer_num_bytes[device_id] = required_buffer_size;
  
         // Delete buffer if it was allocated previously
         CudaInterface<DataType>::del(this->device_buffer[device_id]);
  
         // Allocate (or reallocate) buffer on device.
         CudaInterface<DataType>::alloc_bytes(
                 this->device_buffer[device_id],
                 this->device_buffer_num_bytes[device_id]);
     }
 }

References CudaInterface< ArrayType >::alloc_bytes(), cusparse_interface::cusparse_matrix_buffer_size(), and CudaInterface< ArrayType >::del().

Here is the call graph for this function:

◆ copy_host_to_device()

template<typename DataType >

void cuCSCMatrix< DataType >::copy_host_to_device

protectedvirtual

Copies the member data from the host memory to the device memory.

Note: Despite the input matrix is a CSC matrix, we treat it as a CSR matrix, since cusparse's interface is only for CSR matrices. For this, we swap the number of columns and rows from the input matrix to the cusparse matrix.

Implements cuMatrix< DataType >.

Definition at line 169 of file cu_csc_matrix.cu.

 {
     if (!this->copied_host_to_device)
     {
         // Set the number of threads
         omp_set_num_threads(this->num_gpu_devices);
  
         // Array sizes
         LongIndexType A_data_size = this->get_nnz();
         LongIndexType A_indices_size = A_data_size;
         LongIndexType A_index_pointer_size = this->num_rows + 1;
         LongIndexType A_nnz = this->get_nnz();
  
         // Swapping the number of rows and columns to treat the input CSC
         // matrix as a CSR matrix.
         LongIndexType csc_num_rows = this->num_columns;
         LongIndexType csc_num_columns = this->num_rows;
  
         // Create array of pointers for data on each gpu device
         this->device_A_data = new DataType*[this->num_gpu_devices];
         this->device_A_indices = new LongIndexType*[this->num_gpu_devices];
         this->device_A_index_pointer = \
             new LongIndexType*[this->num_gpu_devices];
         this->cusparse_matrix_A = \
             new cusparseSpMatDescr_t[this->num_gpu_devices];
  
         #pragma omp parallel
         {
             // Switch to a device with the same device id as the cpu thread id
             unsigned int thread_id = omp_get_thread_num();
             CudaInterface<DataType>::set_device(thread_id);
  
             // A_data
             CudaInterface<DataType>::alloc(this->device_A_data[thread_id],
                                            A_data_size);
             CudaInterface<DataType>::copy_to_device(
                     this->A_data, A_data_size, this->device_A_data[thread_id]);
  
             // A_indices
             CudaInterface<LongIndexType>::alloc(
                     this->device_A_indices[thread_id], A_indices_size);
             CudaInterface<LongIndexType>::copy_to_device(
                     this->A_indices, A_indices_size,
                     this->device_A_indices[thread_id]);
  
             // A_index_pointer
             CudaInterface<LongIndexType>::alloc(
                     this->device_A_index_pointer[thread_id],
                     A_index_pointer_size);
             CudaInterface<LongIndexType>::copy_to_device(
                     this->A_index_pointer, A_index_pointer_size,
                     this->device_A_index_pointer[thread_id]);
  
             // Create cusparse matrix
             cusparse_interface::create_cusparse_matrix(
                     this->cusparse_matrix_A[thread_id], csc_num_rows,
                     csc_num_columns, A_nnz, this->device_A_data[thread_id],
                     this->device_A_indices[thread_id],
                     this->device_A_index_pointer[thread_id]);
         }
  
         // Flag to prevent reinitialization
         this->copied_host_to_device = true;
     }
 }

References CudaInterface< ArrayType >::alloc(), CudaInterface< ArrayType >::copy_to_device(), cusparse_interface::create_cusparse_matrix(), and CudaInterface< ArrayType >::set_device().

Referenced by cuCSCMatrix< DataType >::cuCSCMatrix().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ dot()

template<typename DataType >

void cuCSCMatrix< DataType >::dot	(	const DataType *	device_vector,
		DataType *	device_product
	)

virtual

Reimplemented from cCSCMatrix< DataType >.

Definition at line 287 of file cu_csc_matrix.cu.

 {
     assert(this->copied_host_to_device);
  
     // Create cusparse vector for the input vector
     cusparseDnVecDescr_t cusparse_input_vector;
     cusparse_interface::create_cusparse_vector(
             cusparse_input_vector, this->num_columns,
             const_cast<DataType*>(device_vector));
  
     // Create cusparse vector for the output vector
     cusparseDnVecDescr_t cusparse_output_vector;
     cusparse_interface::create_cusparse_vector(
             cusparse_output_vector, this->num_rows, device_product);
  
     // Matrix vector settings
     DataType alpha = 1.0;
     DataType beta = 0.0;
  
     // Using transpose operation since we treat CSC matrix as CSR
     cusparseOperation_t cusparse_operation = CUSPARSE_OPERATION_TRANSPOSE;
     cusparseSpMVAlg_t algorithm = CUSPARSE_SPMV_ALG_DEFAULT;
  
     // Get device id
     int device_id = CudaInterface<DataType>::get_device();
  
     // Allocate device buffer (or reallocation if needed)
     this->allocate_buffer(device_id, cusparse_operation, alpha, beta,
                           cusparse_input_vector, cusparse_output_vector,
                           algorithm);
  
     // Matrix vector multiplication
     cusparse_interface::cusparse_matvec(
             this->cusparse_handle[device_id], cusparse_operation, alpha,
             this->cusparse_matrix_A[device_id], cusparse_input_vector, beta,
             cusparse_output_vector, algorithm, this->device_buffer[device_id]);
  
     // Destroy cusparse vectors
     cusparse_interface::destroy_cusparse_vector(cusparse_input_vector);
     cusparse_interface::destroy_cusparse_vector(cusparse_output_vector);
 }

References cusparse_interface::create_cusparse_vector(), cusparse_interface::cusparse_matvec(), CUSPARSE_SPMV_ALG_DEFAULT, cusparse_interface::destroy_cusparse_vector(), and CudaInterface< ArrayType >::get_device().

Here is the call graph for this function:

◆ dot_plus()

template<typename DataType >

void cuCSCMatrix< DataType >::dot_plus	(	const DataType *	device_vector,
		const DataType	alpha,
		DataType *	device_product
	)

virtual

Reimplemented from cCSCMatrix< DataType >.

Definition at line 337 of file cu_csc_matrix.cu.

 {
     assert(this->copied_host_to_device);
  
     // Create cusparse vector for the input vector
     cusparseDnVecDescr_t cusparse_input_vector;
     cusparse_interface::create_cusparse_vector(
             cusparse_input_vector, this->num_columns,
             const_cast<DataType*>(device_vector));
  
     // Create cusparse vector for the output vector
     cusparseDnVecDescr_t cusparse_output_vector;
     cusparse_interface::create_cusparse_vector(
             cusparse_output_vector, this->num_rows, device_product);
  
     // Matrix vector settings
     DataType beta = 1.0;
  
     // Using transpose operation since we treat CSC matrix as CSR
     cusparseOperation_t cusparse_operation = CUSPARSE_OPERATION_TRANSPOSE;
     cusparseSpMVAlg_t algorithm = CUSPARSE_SPMV_ALG_DEFAULT;
  
     // Get device id
     int device_id = CudaInterface<DataType>::get_device();
  
     // Allocate device buffer (or reallocation if needed)
     this->allocate_buffer(device_id, cusparse_operation, alpha, beta,
                           cusparse_input_vector, cusparse_output_vector,
                           algorithm);
  
     // Matrix vector multiplication
     cusparse_interface::cusparse_matvec(
             this->cusparse_handle[device_id], cusparse_operation, alpha,
             this->cusparse_matrix_A[device_id], cusparse_input_vector, beta,
             cusparse_output_vector, algorithm, this->device_buffer[device_id]);
  
     // Destroy cusparse vectors
     cusparse_interface::destroy_cusparse_vector(cusparse_input_vector);
     cusparse_interface::destroy_cusparse_vector(cusparse_output_vector);
 }

References cusparse_interface::create_cusparse_vector(), cusparse_interface::cusparse_matvec(), CUSPARSE_SPMV_ALG_DEFAULT, cusparse_interface::destroy_cusparse_vector(), and CudaInterface< ArrayType >::get_device().

Here is the call graph for this function:

◆ transpose_dot()

template<typename DataType >

void cuCSCMatrix< DataType >::transpose_dot	(	const DataType *	device_vector,
		DataType *	device_product
	)

virtual

Reimplemented from cCSCMatrix< DataType >.

Definition at line 387 of file cu_csc_matrix.cu.

 {
     assert(this->copied_host_to_device);
  
     // Create cusparse vector for the input vector
     cusparseDnVecDescr_t cusparse_input_vector;
     cusparse_interface::create_cusparse_vector(
             cusparse_input_vector, this->num_columns,
             const_cast<DataType*>(device_vector));
  
     // Create cusparse vector for the output vector
     cusparseDnVecDescr_t cusparse_output_vector;
     cusparse_interface::create_cusparse_vector(
             cusparse_output_vector, this->num_rows, device_product);
  
     // Matrix vector settings
     DataType alpha = 1.0;
     DataType beta = 0.0;
  
     // Using non-transpose operation since we treat CSC matrix as CSR
     cusparseOperation_t cusparse_operation = CUSPARSE_OPERATION_NON_TRANSPOSE;
     cusparseSpMVAlg_t algorithm = CUSPARSE_SPMV_ALG_DEFAULT;
  
     // Get device id
     int device_id = CudaInterface<DataType>::get_device();
  
     // Allocate device buffer (or reallocation if needed)
     this->allocate_buffer(device_id, cusparse_operation, alpha, beta,
                           cusparse_input_vector, cusparse_output_vector,
                           algorithm);
  
     // Matrix vector multiplication
     cusparse_interface::cusparse_matvec(
             this->cusparse_handle[device_id], cusparse_operation, alpha,
             this->cusparse_matrix_A[device_id], cusparse_input_vector, beta,
             cusparse_output_vector, algorithm, this->device_buffer[device_id]);
  
     // Destroy cusparse vectors
     cusparse_interface::destroy_cusparse_vector(cusparse_input_vector);
     cusparse_interface::destroy_cusparse_vector(cusparse_output_vector);
 }

References cusparse_interface::create_cusparse_vector(), cusparse_interface::cusparse_matvec(), CUSPARSE_SPMV_ALG_DEFAULT, cusparse_interface::destroy_cusparse_vector(), and CudaInterface< ArrayType >::get_device().

Here is the call graph for this function:

◆ transpose_dot_plus()

template<typename DataType >

void cuCSCMatrix< DataType >::transpose_dot_plus	(	const DataType *	device_vector,
		const DataType	alpha,
		DataType *	device_product
	)

virtual

Reimplemented from cCSCMatrix< DataType >.

Definition at line 437 of file cu_csc_matrix.cu.

 {
     assert(this->copied_host_to_device);
  
     // Create cusparse vector for the input vector
     cusparseDnVecDescr_t cusparse_input_vector;
     cusparse_interface::create_cusparse_vector(
             cusparse_input_vector, this->num_columns,
             const_cast<DataType*>(device_vector));
  
     // Create cusparse vector for the output vector
     cusparseDnVecDescr_t cusparse_output_vector;
     cusparse_interface::create_cusparse_vector(
             cusparse_output_vector, this->num_rows, device_product);
  
     // Matrix vector settings
     DataType beta = 1.0;
  
     // Using non-transpose operation since we treat CSC matrix as CSR
     cusparseOperation_t cusparse_operation = CUSPARSE_OPERATION_NON_TRANSPOSE;
     cusparseSpMVAlg_t algorithm = CUSPARSE_SPMV_ALG_DEFAULT;
  
     // Get device id
     int device_id = CudaInterface<DataType>::get_device();
  
     // Allocate device buffer (or reallocation if needed)
     this->allocate_buffer(device_id, cusparse_operation, alpha, beta,
                           cusparse_input_vector, cusparse_output_vector,
                           algorithm);
  
     // Matrix vector multiplication
     cusparse_interface::cusparse_matvec(
             this->cusparse_handle[device_id], cusparse_operation, alpha,
             this->cusparse_matrix_A[device_id], cusparse_input_vector, beta,
             cusparse_output_vector, algorithm, this->device_buffer[device_id]);
  
     // Destroy cusparse vectors
     cusparse_interface::destroy_cusparse_vector(cusparse_input_vector);
     cusparse_interface::destroy_cusparse_vector(cusparse_output_vector);
 }

References cusparse_interface::create_cusparse_vector(), cusparse_interface::cusparse_matvec(), CUSPARSE_SPMV_ALG_DEFAULT, cusparse_interface::destroy_cusparse_vector(), and CudaInterface< ArrayType >::get_device().

Here is the call graph for this function:

Definition at line 85 of file cu_csc_matrix.h.

Referenced by cuCSCMatrix< DataType >::cuCSCMatrix().

◆ device_buffer_num_bytes

template<typename DataType >

size_t* cuCSCMatrix< DataType >::device_buffer_num_bytes

protected

Definition at line 86 of file cu_csc_matrix.h.

Referenced by cuCSCMatrix< DataType >::cuCSCMatrix().

The documentation for this class was generated from the following files:

/home/runner/work/imate/imate/imate/_cu_linear_operator/cu_csc_matrix.h
/home/runner/work/imate/imate/imate/_cu_linear_operator/cu_csc_matrix.cu

Public Member Functions

Protected Member Functions

Protected Attributes

Detailed Description

template<typename DataType> class cuCSCMatrix< DataType >

Constructor & Destructor Documentation

◆ cuCSCMatrix() [1/2]

◆ cuCSCMatrix() [2/2]

◆ ~cuCSCMatrix()

Member Function Documentation

◆ allocate_buffer()

◆ copy_host_to_device()

◆ dot()

◆ dot_plus()

◆ transpose_dot()

◆ transpose_dot_plus()

Member Data Documentation

◆ cusparse_matrix_A

◆ device_A_data

◆ device_A_index_pointer

◆ device_A_indices

◆ device_buffer

◆ device_buffer_num_bytes

template<typename DataType>
class cuCSCMatrix< DataType >