20 #include "../_cu_basic_algebra/cu_matrix_operations.h"
21 #include "../_cu_basic_algebra/cusparse_interface.h"
22 #include "../_cuda_utilities/cuda_interface.h"
32 template <
typename DataType>
35 device_A_indices(NULL),
36 device_A_index_pointer(NULL),
38 device_buffer_num_bytes(NULL),
39 cusparse_matrix_A(NULL)
51 template <
typename DataType>
53 const DataType* A_data_,
58 const int num_gpu_devices_):
62 cCSRMatrix<DataType>(A_data_, A_indices_, A_index_pointer_, num_rows_,
64 cuMatrix<DataType>(num_gpu_devices_),
68 device_A_indices(NULL),
69 device_A_index_pointer(NULL),
71 cusparse_matrix_A(NULL)
94 template <
typename DataType>
98 if (this->copied_host_to_device)
101 for (
int device_id=0; device_id < this->num_gpu_devices; ++device_id)
109 this->device_A_indices[device_id]);
111 this->device_A_index_pointer[device_id]);
114 this->cusparse_matrix_A[device_id]);
119 if (this->device_A_data != NULL)
121 delete[] this->device_A_data;
122 this->device_A_data = NULL;
125 if (this->device_A_indices != NULL)
127 delete[] this->device_A_indices;
128 this->device_A_indices = NULL;
131 if (this->device_A_index_pointer != NULL)
133 delete[] this->device_A_index_pointer;
134 this->device_A_index_pointer = NULL;
137 if (this->device_buffer != NULL)
139 delete[] this->device_buffer;
140 this->device_buffer = NULL;
143 if (this->device_buffer_num_bytes != NULL)
145 delete[] this->device_buffer_num_bytes;
146 this->device_buffer_num_bytes = NULL;
149 if (this->cusparse_matrix_A != NULL)
151 delete[] this->cusparse_matrix_A;
152 this->cusparse_matrix_A = NULL;
164 template <
typename DataType>
167 if (!this->copied_host_to_device)
170 omp_set_num_threads(this->num_gpu_devices);
179 this->device_A_data =
new DataType*[this->num_gpu_devices];
180 this->device_A_indices =
new LongIndexType*[this->num_gpu_devices];
181 this->device_A_index_pointer = \
183 this->cusparse_matrix_A = \
184 new cusparseSpMatDescr_t[this->num_gpu_devices];
189 unsigned int thread_id = omp_get_thread_num();
196 this->A_data, A_data_size, this->device_A_data[thread_id]);
200 this->device_A_indices[thread_id], A_indices_size);
202 this->A_indices, A_indices_size,
203 this->device_A_indices[thread_id]);
207 this->device_A_index_pointer[thread_id],
208 A_index_pointer_size);
210 this->A_index_pointer, A_index_pointer_size,
211 this->device_A_index_pointer[thread_id]);
215 this->cusparse_matrix_A[thread_id], this->num_rows,
216 this->num_columns, A_nnz, this->device_A_data[thread_id],
217 this->device_A_indices[thread_id],
218 this->device_A_index_pointer[thread_id]);
222 this->copied_host_to_device =
true;
240 template <
typename DataType>
243 cusparseOperation_t cusparse_operation,
244 const DataType alpha,
246 cusparseDnVecDescr_t& cusparse_input_vector,
247 cusparseDnVecDescr_t& cusparse_output_vector,
248 cusparseSpMVAlg_t algorithm)
251 size_t required_buffer_size;
253 this->cusparse_handle[device_id], cusparse_operation, alpha,
254 this->cusparse_matrix_A[device_id], cusparse_input_vector, beta,
255 cusparse_output_vector, algorithm, &required_buffer_size);
257 if (this->device_buffer_num_bytes[device_id] != required_buffer_size)
260 this->device_buffer_num_bytes[device_id] = required_buffer_size;
267 this->device_buffer[device_id],
268 this->device_buffer_num_bytes[device_id]);
277 template <
typename DataType>
279 const DataType* device_vector,
280 DataType* device_product)
282 assert(this->copied_host_to_device);
285 cusparseDnVecDescr_t cusparse_input_vector;
287 cusparse_input_vector, this->num_columns,
288 const_cast<DataType*
>(device_vector));
291 cusparseDnVecDescr_t cusparse_output_vector;
293 cusparse_output_vector, this->num_rows, device_product);
296 DataType alpha = 1.0;
298 cusparseOperation_t cusparse_operation = CUSPARSE_OPERATION_NON_TRANSPOSE;
305 this->allocate_buffer(device_id, cusparse_operation, alpha, beta,
306 cusparse_input_vector, cusparse_output_vector,
311 this->cusparse_handle[device_id], cusparse_operation, alpha,
312 this->cusparse_matrix_A[device_id], cusparse_input_vector, beta,
313 cusparse_output_vector, algorithm, this->device_buffer[device_id]);
325 template <
typename DataType>
327 const DataType* device_vector,
328 const DataType alpha,
329 DataType* device_product)
331 assert(this->copied_host_to_device);
334 cusparseDnVecDescr_t cusparse_input_vector;
336 cusparse_input_vector, this->num_columns,
337 const_cast<DataType*
>(device_vector));
340 cusparseDnVecDescr_t cusparse_output_vector;
342 cusparse_output_vector, this->num_rows, device_product);
346 cusparseOperation_t cusparse_operation = CUSPARSE_OPERATION_NON_TRANSPOSE;
353 this->allocate_buffer(device_id, cusparse_operation, alpha, beta,
354 cusparse_input_vector, cusparse_output_vector,
359 this->cusparse_handle[device_id], cusparse_operation, alpha,
360 this->cusparse_matrix_A[device_id], cusparse_input_vector, beta,
361 cusparse_output_vector, algorithm, this->device_buffer[device_id]);
373 template <
typename DataType>
375 const DataType* device_vector,
376 DataType* device_product)
378 assert(this->copied_host_to_device);
381 cusparseDnVecDescr_t cusparse_input_vector;
383 cusparse_input_vector, this->num_columns,
384 const_cast<DataType*
>(device_vector));
387 cusparseDnVecDescr_t cusparse_output_vector;
389 cusparse_output_vector, this->num_rows, device_product);
392 DataType alpha = 1.0;
394 cusparseOperation_t cusparse_operation = CUSPARSE_OPERATION_TRANSPOSE;
401 this->allocate_buffer(device_id, cusparse_operation, alpha, beta,
402 cusparse_input_vector, cusparse_output_vector,
407 this->cusparse_handle[device_id], cusparse_operation, alpha,
408 this->cusparse_matrix_A[device_id], cusparse_input_vector, beta,
409 cusparse_output_vector, algorithm, this->device_buffer[device_id]);
421 template <
typename DataType>
423 const DataType* device_vector,
424 const DataType alpha,
425 DataType* device_product)
427 assert(this->copied_host_to_device);
430 cusparseDnVecDescr_t cusparse_input_vector;
432 cusparse_input_vector, this->num_columns,
433 const_cast<DataType*
>(device_vector));
436 cusparseDnVecDescr_t cusparse_output_vector;
438 cusparse_output_vector, this->num_rows, device_product);
442 cusparseOperation_t cusparse_operation = CUSPARSE_OPERATION_TRANSPOSE;
449 this->allocate_buffer(device_id, cusparse_operation, alpha, beta,
450 cusparse_input_vector, cusparse_output_vector,
455 this->cusparse_handle[device_id], cusparse_operation, alpha,
456 this->cusparse_matrix_A[device_id], cusparse_input_vector, beta,
457 cusparse_output_vector, algorithm, this->device_buffer[device_id]);
static int get_device()
Gets the current device in multi-gpu applications.
static void del(void *device_array)
Deletes memory on gpu device if its pointer is not NULL, then sets the pointer to NULL.
static void alloc_bytes(void *&device_array, const size_t num_bytes)
Allocates memory on gpu device. This function uses an existing given pointer.
static ArrayType * alloc(const LongIndexType array_size)
Allocates memory on gpu device. This function creates a pointer and returns it.
static void copy_to_device(const ArrayType *host_array, const LongIndexType array_size, ArrayType *device_array)
Copies memory on host to device memory.
static void set_device(int device_id)
Sets the current device in multi-gpu applications.
Base class for linear operators. This class serves as interface for all derived classes.
size_t * device_buffer_num_bytes
cuCSRMatrix()
Default constructor.
virtual ~cuCSRMatrix()
Virtual desructor.
virtual void transpose_dot(const DataType *device_vector, DataType *device_product)
virtual void dot_plus(const DataType *device_vector, const DataType alpha, DataType *device_product)
virtual void dot(const DataType *device_vector, DataType *device_product)
virtual void transpose_dot_plus(const DataType *device_vector, const DataType alpha, DataType *device_product)
void allocate_buffer(const int device_id, cusparseOperation_t cusparse_operation, const DataType alpha, const DataType beta, cusparseDnVecDescr_t &cusparse_input_vector, cusparseDnVecDescr_t &cusparse_output_vector, cusparseSpMVAlg_t algorithm)
Allocates an external buffer for matrix-vector multiplication using cusparseSpMV function.
virtual void copy_host_to_device()
Copies the member data from the host memory to the device memory.
void initialize_cusparse_handle()
Creates a cusparseHandle_t object, if not created already.
Base class for constant matrices.
#define CUSPARSE_SPMV_ALG_DEFAULT
void create_cusparse_matrix(cusparseSpMatDescr_t &cusparse_matrix, const LongIndexType num_rows, const LongIndexType num_columns, const LongIndexType nnz, DataType *device_A_data, LongIndexType *device_A_indices, LongIndexType *device_A_index_pointer)
void destroy_cusparse_matrix(cusparseSpMatDescr_t &cusparse_matrix)
Destroys cusparse matrix.
void destroy_cusparse_vector(cusparseDnVecDescr_t &cusparse_vector)
Destroys cusparse vector.
void cusparse_matvec(cusparseHandle_t cusparse_handle, cusparseOperation_t cusparse_operation, const DataType alpha, cusparseSpMatDescr_t cusparse_matrix, cusparseDnVecDescr_t cusparse_input_vector, const DataType beta, cusparseDnVecDescr_t cusparse_output_vector, cusparseSpMVAlg_t algorithm, void *external_buffer)
void create_cusparse_vector(cusparseDnVecDescr_t &cusparse_vector, const LongIndexType vector_size, DataType *device_vector)
void cusparse_matrix_buffer_size(cusparseHandle_t cusparse_handle, cusparseOperation_t cusparse_operation, const DataType alpha, cusparseSpMatDescr_t cusparse_matrix, cusparseDnVecDescr_t cusparse_input_vector, const DataType beta, cusparseDnVecDescr_t cusparse_output_vector, cusparseSpMVAlg_t algorithm, size_t *buffer_size)