20 #include "../_cu_basic_algebra/cu_matrix_operations.h"
21 #include "../_cu_basic_algebra/cusparse_interface.h"
22 #include "../_cuda_utilities/cuda_interface.h"
32 template <
typename DataType>
35 device_A_indices(NULL),
36 device_A_index_pointer(NULL),
38 device_buffer_num_bytes(NULL),
39 cusparse_matrix_A(NULL)
51 template <
typename DataType>
53 const DataType* A_data_,
58 const int num_gpu_devices_):
62 cCSCMatrix<DataType>(A_data_, A_indices_, A_index_pointer_, num_rows_,
64 cuMatrix<DataType>(num_gpu_devices_),
68 device_A_indices(NULL),
69 device_A_index_pointer(NULL),
71 cusparse_matrix_A(NULL)
94 template <
typename DataType>
98 if (this->copied_host_to_device)
101 for (
int device_id=0; device_id < this->num_gpu_devices; ++device_id)
109 this->device_A_indices[device_id]);
111 this->device_A_index_pointer[device_id]);
114 this->cusparse_matrix_A[device_id]);
119 if (this->device_A_data != NULL)
121 delete[] this->device_A_data;
122 this->device_A_data = NULL;
125 if (this->device_A_indices != NULL)
127 delete[] this->device_A_indices;
128 this->device_A_indices = NULL;
131 if (this->device_A_index_pointer != NULL)
133 delete[] this->device_A_index_pointer;
134 this->device_A_index_pointer = NULL;
137 if (this->device_buffer != NULL)
139 delete[] this->device_buffer;
140 this->device_buffer = NULL;
143 if (this->device_buffer_num_bytes != NULL)
145 delete[] this->device_buffer_num_bytes;
146 this->device_buffer_num_bytes = NULL;
149 if (this->cusparse_matrix_A != NULL)
151 delete[] this->cusparse_matrix_A;
152 this->cusparse_matrix_A = NULL;
168 template <
typename DataType>
171 if (!this->copied_host_to_device)
174 omp_set_num_threads(this->num_gpu_devices);
188 this->device_A_data =
new DataType*[this->num_gpu_devices];
189 this->device_A_indices =
new LongIndexType*[this->num_gpu_devices];
190 this->device_A_index_pointer = \
192 this->cusparse_matrix_A = \
193 new cusparseSpMatDescr_t[this->num_gpu_devices];
198 unsigned int thread_id = omp_get_thread_num();
205 this->A_data, A_data_size, this->device_A_data[thread_id]);
209 this->device_A_indices[thread_id], A_indices_size);
211 this->A_indices, A_indices_size,
212 this->device_A_indices[thread_id]);
216 this->device_A_index_pointer[thread_id],
217 A_index_pointer_size);
219 this->A_index_pointer, A_index_pointer_size,
220 this->device_A_index_pointer[thread_id]);
224 this->cusparse_matrix_A[thread_id], csc_num_rows,
225 csc_num_columns, A_nnz, this->device_A_data[thread_id],
226 this->device_A_indices[thread_id],
227 this->device_A_index_pointer[thread_id]);
231 this->copied_host_to_device =
true;
249 template <
typename DataType>
252 cusparseOperation_t cusparse_operation,
253 const DataType alpha,
255 cusparseDnVecDescr_t& cusparse_input_vector,
256 cusparseDnVecDescr_t& cusparse_output_vector,
257 cusparseSpMVAlg_t algorithm)
260 size_t required_buffer_size;
262 this->cusparse_handle[device_id], cusparse_operation, alpha,
263 this->cusparse_matrix_A[device_id], cusparse_input_vector, beta,
264 cusparse_output_vector, algorithm, &required_buffer_size);
266 if (this->device_buffer_num_bytes[device_id] != required_buffer_size)
269 this->device_buffer_num_bytes[device_id] = required_buffer_size;
276 this->device_buffer[device_id],
277 this->device_buffer_num_bytes[device_id]);
286 template <
typename DataType>
288 const DataType* device_vector,
289 DataType* device_product)
291 assert(this->copied_host_to_device);
294 cusparseDnVecDescr_t cusparse_input_vector;
296 cusparse_input_vector, this->num_columns,
297 const_cast<DataType*
>(device_vector));
300 cusparseDnVecDescr_t cusparse_output_vector;
302 cusparse_output_vector, this->num_rows, device_product);
305 DataType alpha = 1.0;
309 cusparseOperation_t cusparse_operation = CUSPARSE_OPERATION_TRANSPOSE;
316 this->allocate_buffer(device_id, cusparse_operation, alpha, beta,
317 cusparse_input_vector, cusparse_output_vector,
322 this->cusparse_handle[device_id], cusparse_operation, alpha,
323 this->cusparse_matrix_A[device_id], cusparse_input_vector, beta,
324 cusparse_output_vector, algorithm, this->device_buffer[device_id]);
336 template <
typename DataType>
338 const DataType* device_vector,
339 const DataType alpha,
340 DataType* device_product)
342 assert(this->copied_host_to_device);
345 cusparseDnVecDescr_t cusparse_input_vector;
347 cusparse_input_vector, this->num_columns,
348 const_cast<DataType*
>(device_vector));
351 cusparseDnVecDescr_t cusparse_output_vector;
353 cusparse_output_vector, this->num_rows, device_product);
359 cusparseOperation_t cusparse_operation = CUSPARSE_OPERATION_TRANSPOSE;
366 this->allocate_buffer(device_id, cusparse_operation, alpha, beta,
367 cusparse_input_vector, cusparse_output_vector,
372 this->cusparse_handle[device_id], cusparse_operation, alpha,
373 this->cusparse_matrix_A[device_id], cusparse_input_vector, beta,
374 cusparse_output_vector, algorithm, this->device_buffer[device_id]);
386 template <
typename DataType>
388 const DataType* device_vector,
389 DataType* device_product)
391 assert(this->copied_host_to_device);
394 cusparseDnVecDescr_t cusparse_input_vector;
396 cusparse_input_vector, this->num_columns,
397 const_cast<DataType*
>(device_vector));
400 cusparseDnVecDescr_t cusparse_output_vector;
402 cusparse_output_vector, this->num_rows, device_product);
405 DataType alpha = 1.0;
409 cusparseOperation_t cusparse_operation = CUSPARSE_OPERATION_NON_TRANSPOSE;
416 this->allocate_buffer(device_id, cusparse_operation, alpha, beta,
417 cusparse_input_vector, cusparse_output_vector,
422 this->cusparse_handle[device_id], cusparse_operation, alpha,
423 this->cusparse_matrix_A[device_id], cusparse_input_vector, beta,
424 cusparse_output_vector, algorithm, this->device_buffer[device_id]);
436 template <
typename DataType>
438 const DataType* device_vector,
439 const DataType alpha,
440 DataType* device_product)
442 assert(this->copied_host_to_device);
445 cusparseDnVecDescr_t cusparse_input_vector;
447 cusparse_input_vector, this->num_columns,
448 const_cast<DataType*
>(device_vector));
451 cusparseDnVecDescr_t cusparse_output_vector;
453 cusparse_output_vector, this->num_rows, device_product);
459 cusparseOperation_t cusparse_operation = CUSPARSE_OPERATION_NON_TRANSPOSE;
466 this->allocate_buffer(device_id, cusparse_operation, alpha, beta,
467 cusparse_input_vector, cusparse_output_vector,
472 this->cusparse_handle[device_id], cusparse_operation, alpha,
473 this->cusparse_matrix_A[device_id], cusparse_input_vector, beta,
474 cusparse_output_vector, algorithm, this->device_buffer[device_id]);
static int get_device()
Gets the current device in multi-gpu applications.
static void del(void *device_array)
Deletes memory on gpu device if its pointer is not NULL, then sets the pointer to NULL.
static void alloc_bytes(void *&device_array, const size_t num_bytes)
Allocates memory on gpu device. This function uses an existing given pointer.
static ArrayType * alloc(const LongIndexType array_size)
Allocates memory on gpu device. This function creates a pointer and returns it.
static void copy_to_device(const ArrayType *host_array, const LongIndexType array_size, ArrayType *device_array)
Copies memory on host to device memory.
static void set_device(int device_id)
Sets the current device in multi-gpu applications.
Base class for linear operators. This class serves as interface for all derived classes.
virtual void transpose_dot(const DataType *device_vector, DataType *device_product)
virtual void transpose_dot_plus(const DataType *device_vector, const DataType alpha, DataType *device_product)
virtual void dot(const DataType *device_vector, DataType *device_product)
size_t * device_buffer_num_bytes
cuCSCMatrix()
Default constructor.
virtual void dot_plus(const DataType *device_vector, const DataType alpha, DataType *device_product)
virtual void copy_host_to_device()
Copies the member data from the host memory to the device memory.
void allocate_buffer(const int device_id, cusparseOperation_t cusparse_operation, const DataType alpha, const DataType beta, cusparseDnVecDescr_t &cusparse_input_vector, cusparseDnVecDescr_t &cusparse_output_vector, cusparseSpMVAlg_t algorithm)
Allocates an external buffer for matrix-vector multiplication using cusparseSpMV function.
virtual ~cuCSCMatrix()
Virtual desructor.
void initialize_cusparse_handle()
Creates a cusparseHandle_t object, if not created already.
Base class for constant matrices.
#define CUSPARSE_SPMV_ALG_DEFAULT
void create_cusparse_matrix(cusparseSpMatDescr_t &cusparse_matrix, const LongIndexType num_rows, const LongIndexType num_columns, const LongIndexType nnz, DataType *device_A_data, LongIndexType *device_A_indices, LongIndexType *device_A_index_pointer)
void destroy_cusparse_matrix(cusparseSpMatDescr_t &cusparse_matrix)
Destroys cusparse matrix.
void destroy_cusparse_vector(cusparseDnVecDescr_t &cusparse_vector)
Destroys cusparse vector.
void cusparse_matvec(cusparseHandle_t cusparse_handle, cusparseOperation_t cusparse_operation, const DataType alpha, cusparseSpMatDescr_t cusparse_matrix, cusparseDnVecDescr_t cusparse_input_vector, const DataType beta, cusparseDnVecDescr_t cusparse_output_vector, cusparseSpMVAlg_t algorithm, void *external_buffer)
void create_cusparse_vector(cusparseDnVecDescr_t &cusparse_vector, const LongIndexType vector_size, DataType *device_vector)
void cusparse_matrix_buffer_size(cusparseHandle_t cusparse_handle, cusparseOperation_t cusparse_operation, const DataType alpha, cusparseSpMatDescr_t cusparse_matrix, cusparseDnVecDescr_t cusparse_input_vector, const DataType beta, cusparseDnVecDescr_t cusparse_output_vector, cusparseSpMVAlg_t algorithm, size_t *buffer_size)