imate
C++/CUDA Reference
Loading...
Searching...
No Matches
cu_csc_matrix.cu
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <sameli@berkeley.edu>
3 * SPDX-License-Identifier: BSD-3-Clause
4 * SPDX-FileType: SOURCE
5 *
6 * This program is free software: you can redistribute it and/or modify it
7 * under the terms of the license found in the LICENSE.txt file in the root
8 * directory of this source tree.
9 */
10
11
12// =======
13// Headers
14// =======
15
16#include "./cu_csc_matrix.h"
17#include "../_definitions/definitions.h" // USE_OPENMP
18#include "../_cu_definitions/cu_types.h" // __nv_fp8_e5m2, __nv_fp8_e4m3,
19 // __half, __nv_bfloat16
20
21#if defined(USE_OPENMP) && (USE_OPENMP == 1)
22 #include <omp.h> // omp_set_num_threads
23#endif
24
25#include <cuda.h> // CUDA_VERSION
26#include <cstddef> // NULL
27#include <cassert> // assert
28#include "../_cu_basic_algebra/cu_matrix_operations.h" // cuMatrixOperations
29#include "../_cu_basic_algebra/cusparse_api.h" // cusparse_api
30#include "../_cuda_utilities/cuda_api.h" // CudaAPI
31#include "../_cu_arithmetics/cu_arithmetics.h" // cu_arithmetics
32
33
34// =============
35// constructor 1
36// =============
37
40
41template <typename DataType>
43 A_data(NULL),
44 A_indices(NULL),
45 A_index_pointer(NULL),
46 device_A_data(NULL),
47 device_A_indices(NULL),
48 device_A_index_pointer(NULL),
49 device_buffer(NULL),
50 device_buffer_num_bytes(NULL),
51 cusparse_matrix_A(NULL)
52{
53}
54
55
56// =============
57// constructor 2
58// =============
59
82
83template <typename DataType>
85 const DataType* A_data_,
86 const LongIndexType* A_indices_,
87 const LongIndexType* A_index_pointer_,
88 const LongIndexType num_rows_,
89 const LongIndexType num_columns_,
90 const FlagType A_is_symmetric_,
91 const int num_gpu_devices_):
92
93 // Base class constructor
94 cLinearOperatorBase(num_rows_, num_columns_),
95 cuLinearOperator<DataType>(num_gpu_devices_),
96 cuMatrix<DataType>(A_is_symmetric_),
97
98 // Initializer list
99 A_data(A_data_),
100 A_indices(A_indices_),
101 A_index_pointer(A_index_pointer_),
102 device_A_data(NULL),
103 device_A_indices(NULL),
104 device_A_index_pointer(NULL),
105 device_buffer(NULL),
106 cusparse_matrix_A(NULL)
107{
109 this->copy_host_to_device();
110
111 // Initialize device buffer
112 this->device_buffer = new void*[this->num_gpu_devices];
113 this->device_buffer_num_bytes = new size_t[this->num_gpu_devices];
114 for (int device_id=0; device_id < this->num_gpu_devices; ++device_id)
115 {
116 this->device_buffer[device_id] = NULL;
117 this->device_buffer_num_bytes[device_id] = 0;
118 }
119}
120
121
122// ==========
123// destructor
124// ==========
125
128
129template <typename DataType>
131{
132 // Member objects exist if the second constructor was called.
133 if (this->copied_host_to_device)
134 {
135 // Deallocate arrays of data on gpu
136 for (int device_id=0; device_id < this->num_gpu_devices; ++device_id)
137 {
138 // Switch to a device
140
141 // Deallocate
142 CudaAPI<DataType>::del(this->device_A_data[device_id]);
144 this->device_A_indices[device_id]);
146 this->device_A_index_pointer[device_id]);
147 CudaAPI<LongIndexType>::del(this->device_buffer[device_id]);
149 this->cusparse_matrix_A[device_id]);
150 }
151 }
152
153 // Deallocate arrays of pointers on cpu
154 if (this->device_A_data != NULL)
155 {
156 delete[] this->device_A_data;
157 this->device_A_data = NULL;
158 }
159
160 if (this->device_A_indices != NULL)
161 {
162 delete[] this->device_A_indices;
163 this->device_A_indices = NULL;
164 }
165
166 if (this->device_A_index_pointer != NULL)
167 {
168 delete[] this->device_A_index_pointer;
169 this->device_A_index_pointer = NULL;
170 }
171
172 if (this->device_buffer != NULL)
173 {
174 delete[] this->device_buffer;
175 this->device_buffer = NULL;
176 }
177
178 if (this->device_buffer_num_bytes != NULL)
179 {
180 delete[] this->device_buffer_num_bytes;
181 this->device_buffer_num_bytes = NULL;
182 }
183
184 if (this->cusparse_matrix_A != NULL)
185 {
186 delete[] this->cusparse_matrix_A;
187 this->cusparse_matrix_A = NULL;
188 }
189}
190
191
192// ===================
193// copy host to device
194// ===================
195
203
204template <typename DataType>
206{
207 if (!this->copied_host_to_device)
208 {
209 // Set the number of threads
210 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
211 omp_set_num_threads(this->num_gpu_devices);
212 #endif
213
214 // Array sizes
215 LongIndexType A_data_size = this->get_nnz();
216 LongIndexType A_indices_size = A_data_size;
217 LongIndexType A_index_pointer_size = this->num_rows + 1;
218 LongIndexType A_nnz = this->get_nnz();
219
220 // CuSparse API in CUDA below 12 does not support CSC matrix
221 #ifndef CUDA_VERSION
222 #error CUDA_VERSION Undefined!
223 #elif CUDA_VERSION < 12000
224 // Swapping the number of rows and columns to treat the input CSC
225 // matrix as a CSR matrix.
226 LongIndexType csc_num_rows = this->num_columns;
227 LongIndexType csc_num_columns = this->num_rows;
228 #endif
229
230 // Create array of pointers for data on each gpu device
231 this->device_A_data = new DataType*[this->num_gpu_devices];
232 this->device_A_indices = new LongIndexType*[this->num_gpu_devices];
233 this->device_A_index_pointer = \
234 new LongIndexType*[this->num_gpu_devices];
235 this->cusparse_matrix_A = \
236 new cusparseSpMatDescr_t[this->num_gpu_devices];
237
238 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
239 #pragma omp parallel
240 #endif
241 {
242 // Switch to a device with the same device id as the cpu thread id
243 unsigned int thread_id;
244 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
245 thread_id = omp_get_thread_num();
246 #else
247 thread_id = 0;
248 #endif
249
251
252 // A_data
253 CudaAPI<DataType>::alloc(this->device_A_data[thread_id],
254 A_data_size);
256 this->A_data, A_data_size, this->device_A_data[thread_id]);
257
258 // A_indices
260 this->device_A_indices[thread_id], A_indices_size);
262 this->A_indices, A_indices_size,
263 this->device_A_indices[thread_id]);
264
265 // A_index_pointer
267 this->device_A_index_pointer[thread_id],
268 A_index_pointer_size);
270 this->A_index_pointer, A_index_pointer_size,
271 this->device_A_index_pointer[thread_id]);
272
273 // Create cusparse matrix
274 #ifndef CUDA_VERSION
275 #error CUDA_VERSION Undefined!
276 #elif CUDA_VERSION < 12000
277 // Treat CSC as CSR matrix with swapped columns and rows
279 this->cusparse_matrix_A[thread_id], csc_num_rows,
280 csc_num_columns, A_nnz, this->device_A_data[thread_id],
281 this->device_A_indices[thread_id],
282 this->device_A_index_pointer[thread_id]);
283 #else
284 // Use CSC api in CUDA >= 12
286 this->cusparse_matrix_A[thread_id], this->num_rows,
287 this->num_columns, A_nnz,
288 this->device_A_data[thread_id],
289 this->device_A_indices[thread_id],
290 this->device_A_index_pointer[thread_id]);
291 #endif
292 }
293
294 // Flag to prevent reinitialization
295 this->copied_host_to_device = true;
296 }
297}
298
299
300// ===============
301// allocate buffer
302// ===============
303
333
334template <typename DataType>
336 const int device_id,
337 cusparseOperation_t cusparse_operation,
338 const DataType alpha,
339 const DataType beta,
340 cusparseDnVecDescr_t& cusparse_input_vector,
341 cusparseDnVecDescr_t& cusparse_output_vector,
342 cusparseSpMVAlg_t algorithm)
343{
344 // Find the buffer size needed for matrix-vector multiplication
345 size_t required_buffer_size;
347 this->cusparse_handle[device_id], cusparse_operation, alpha,
348 this->cusparse_matrix_A[device_id], cusparse_input_vector, beta,
349 cusparse_output_vector, algorithm, &required_buffer_size);
350
351 if (this->device_buffer_num_bytes[device_id] != required_buffer_size)
352 {
353 // Update the buffer size
354 this->device_buffer_num_bytes[device_id] = required_buffer_size;
355
356 // Delete buffer if it was allocated previously
357 CudaAPI<DataType>::del(this->device_buffer[device_id]);
358
359 // Allocate (or reallocate) buffer on device.
361 this->device_buffer[device_id],
362 this->device_buffer_num_bytes[device_id]);
363 }
364}
365
366
367// ==================
368// is identity matrix
369// ==================
370
379
380template <typename DataType>
382{
383 FlagType matrix_is_identity = 1;
384 LongIndexType index_pointer;
385 LongIndexType row;
386 DataType matrix_element;
387 const DataType diagonal = 1.0;
388 const DataType off_diagonal = 0.0;
389
390 // Check matrix element-wise
391 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
392 #pragma omp parallel for \
393 schedule(static) \
394 if (!omp_in_parallel()) \
395 default(none) \
396 shared(matrix_is_identity, diagonal, off_diagonal) \
397 private(index_pointer, row, matrix_element)
398 #endif
399 for (LongIndexType column=0; column < this->num_columns; ++column)
400 {
401 if (matrix_is_identity)
402 {
403 for (index_pointer=this->A_index_pointer[column];
404 index_pointer < this->A_index_pointer[column+1];
405 ++index_pointer)
406 {
407 row = this->A_indices[index_pointer];
408
409 if (!((this->A_is_symmetric) && (column >= row)))
410 {
411 matrix_element = this->A_data[index_pointer];
412
413 if (((row == column) && \
414 (!cu_arithmetics::is_equal(matrix_element,
415 diagonal))) || \
416 ((row != column) && \
417 (!cu_arithmetics::is_equal(matrix_element,
418 off_diagonal))))
419 {
420 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
421 #pragma omp atomic write
422 #endif
423 matrix_is_identity = 0;
424
425 break;
426 }
427 }
428 }
429 }
430 }
431
432 return matrix_is_identity;
433}
434
435
436// =======
437// get nnz
438// =======
439
447
448template <typename DataType>
450{
451 return this->A_index_pointer[this->num_columns];
452}
453
454
455// ===
456// dot
457// ===
458
476
477template <typename DataType>
479 const DataType* device_vector,
480 DataType* device_product)
481{
482 assert(this->copied_host_to_device);
483
484 // Create cusparse vector for the input vector
485 cusparseDnVecDescr_t cusparse_input_vector;
487 cusparse_input_vector, this->num_columns,
488 const_cast<DataType*>(device_vector));
489
490 // Create cusparse vector for the output vector
491 cusparseDnVecDescr_t cusparse_output_vector;
493 cusparse_output_vector, this->num_rows, device_product);
494
495 // Matrix vector settings
496 DataType alpha = cu_arithmetics::cast<float, DataType>(1.0f);
497 DataType beta = cu_arithmetics::cast<float, DataType>(0.0f);
498
499 #ifndef CUDA_VERSION
500 #error CUDA_VERSION Undefined!
501 #elif CUDA_VERSION < 12000
502 // Using transpose operation since we treat CSC matrix as CSR
503 cusparseOperation_t cusparse_operation = CUSPARSE_OPERATION_TRANSPOSE;
504 #else
505 cusparseOperation_t cusparse_operation = \
506 CUSPARSE_OPERATION_NON_TRANSPOSE;
507 #endif
508
509 cusparseSpMVAlg_t algorithm = CUSPARSE_SPMV_ALG_DEFAULT;
510
511 // Get device id
512 int device_id = CudaAPI<DataType>::get_device();
513
514 // Allocate device buffer (or reallocation if needed)
515 this->allocate_buffer(device_id, cusparse_operation, alpha, beta,
516 cusparse_input_vector, cusparse_output_vector,
517 algorithm);
518
519 // Matrix vector multiplication
521 this->cusparse_handle[device_id], cusparse_operation, alpha,
522 this->cusparse_matrix_A[device_id], cusparse_input_vector, beta,
523 cusparse_output_vector, algorithm, this->device_buffer[device_id]);
524
525 // Destroy cusparse vectors
526 cusparse_api::destroy_cusparse_vector(cusparse_input_vector);
527 cusparse_api::destroy_cusparse_vector(cusparse_output_vector);
528}
529
530
531// ========
532// dot plus
533// ========
534
554
555template <typename DataType>
557 const DataType* device_vector,
558 const DataType alpha,
559 DataType* device_product)
560{
561 assert(this->copied_host_to_device);
562
563 // Create cusparse vector for the input vector
564 cusparseDnVecDescr_t cusparse_input_vector;
566 cusparse_input_vector, this->num_columns,
567 const_cast<DataType*>(device_vector));
568
569 // Create cusparse vector for the output vector
570 cusparseDnVecDescr_t cusparse_output_vector;
572 cusparse_output_vector, this->num_rows, device_product);
573
574 // Matrix vector settings
575 DataType beta = cu_arithmetics::cast<float, DataType>(1.0f);
576
577 #ifndef CUDA_VERSION
578 #error CUDA_VERSION Undefined!
579 #elif CUDA_VERSION < 12000
580 // Using transpose operation since we treat CSC matrix as CSR
581 cusparseOperation_t cusparse_operation = CUSPARSE_OPERATION_TRANSPOSE;
582 #else
583 cusparseOperation_t cusparse_operation = \
584 CUSPARSE_OPERATION_NON_TRANSPOSE;
585 #endif
586
587 cusparseSpMVAlg_t algorithm = CUSPARSE_SPMV_ALG_DEFAULT;
588
589 // Get device id
590 int device_id = CudaAPI<DataType>::get_device();
591
592 // Allocate device buffer (or reallocation if needed)
593 this->allocate_buffer(device_id, cusparse_operation, alpha, beta,
594 cusparse_input_vector, cusparse_output_vector,
595 algorithm);
596
597 // Matrix vector multiplication
599 this->cusparse_handle[device_id], cusparse_operation, alpha,
600 this->cusparse_matrix_A[device_id], cusparse_input_vector, beta,
601 cusparse_output_vector, algorithm, this->device_buffer[device_id]);
602
603 // Destroy cusparse vectors
604 cusparse_api::destroy_cusparse_vector(cusparse_input_vector);
605 cusparse_api::destroy_cusparse_vector(cusparse_output_vector);
606}
607
608
609// =============
610// transpose dot
611// =============
612
630
631template <typename DataType>
633 const DataType* device_vector,
634 DataType* device_product)
635{
636 assert(this->copied_host_to_device);
637
638 // Create cusparse vector for the input vector
639 cusparseDnVecDescr_t cusparse_input_vector;
641 cusparse_input_vector, this->num_columns,
642 const_cast<DataType*>(device_vector));
643
644 // Create cusparse vector for the output vector
645 cusparseDnVecDescr_t cusparse_output_vector;
647 cusparse_output_vector, this->num_rows, device_product);
648
649 // Matrix vector settings
650 DataType alpha = cu_arithmetics::cast<float, DataType>(1.0f);
651 DataType beta = cu_arithmetics::cast<float, DataType>(0.0f);
652
653 #ifndef CUDA_VERSION
654 #error CUDA_VERSION Undefined!
655 #elif CUDA_VERSION < 12000
656 // Using non-transpose operation since we treat CSC matrix as CSR
657 cusparseOperation_t cusparse_operation = \
658 CUSPARSE_OPERATION_NON_TRANSPOSE;
659 #else
660 cusparseOperation_t cusparse_operation = CUSPARSE_OPERATION_TRANSPOSE;
661 #endif
662
663 cusparseSpMVAlg_t algorithm = CUSPARSE_SPMV_ALG_DEFAULT;
664
665 // Get device id
666 int device_id = CudaAPI<DataType>::get_device();
667
668 // Allocate device buffer (or reallocation if needed)
669 this->allocate_buffer(device_id, cusparse_operation, alpha, beta,
670 cusparse_input_vector, cusparse_output_vector,
671 algorithm);
672
673 // Matrix vector multiplication
675 this->cusparse_handle[device_id], cusparse_operation, alpha,
676 this->cusparse_matrix_A[device_id], cusparse_input_vector, beta,
677 cusparse_output_vector, algorithm, this->device_buffer[device_id]);
678
679 // Destroy cusparse vectors
680 cusparse_api::destroy_cusparse_vector(cusparse_input_vector);
681 cusparse_api::destroy_cusparse_vector(cusparse_output_vector);
682}
683
684
685// ==================
686// transpose dot plus
687// ==================
688
709
710template <typename DataType>
712 const DataType* device_vector,
713 const DataType alpha,
714 DataType* device_product)
715{
716 assert(this->copied_host_to_device);
717
718 // Create cusparse vector for the input vector
719 cusparseDnVecDescr_t cusparse_input_vector;
721 cusparse_input_vector, this->num_columns,
722 const_cast<DataType*>(device_vector));
723
724 // Create cusparse vector for the output vector
725 cusparseDnVecDescr_t cusparse_output_vector;
727 cusparse_output_vector, this->num_rows, device_product);
728
729 // Matrix vector settings
730 DataType beta = cu_arithmetics::cast<float, DataType>(1.0f);
731
732 #ifndef CUDA_VERSION
733 #error CUDA_VERSION Undefined!
734 #elif CUDA_VERSION < 12000
735 // Using non-transpose operation since we treat CSC matrix as CSR
736 cusparseOperation_t cusparse_operation = \
737 CUSPARSE_OPERATION_NON_TRANSPOSE;
738 #else
739 cusparseOperation_t cusparse_operation = CUSPARSE_OPERATION_TRANSPOSE;
740 #endif
741
742 cusparseSpMVAlg_t algorithm = CUSPARSE_SPMV_ALG_DEFAULT;
743
744 // Get device id
745 int device_id = CudaAPI<DataType>::get_device();
746
747 // Allocate device buffer (or reallocation if needed)
748 this->allocate_buffer(device_id, cusparse_operation, alpha, beta,
749 cusparse_input_vector, cusparse_output_vector,
750 algorithm);
751
752 // Matrix vector multiplication
754 this->cusparse_handle[device_id], cusparse_operation, alpha,
755 this->cusparse_matrix_A[device_id], cusparse_input_vector, beta,
756 cusparse_output_vector, algorithm, this->device_buffer[device_id]);
757
758 // Destroy cusparse vectors
759 cusparse_api::destroy_cusparse_vector(cusparse_input_vector);
760 cusparse_api::destroy_cusparse_vector(cusparse_output_vector);
761}
762
763
764// ===============================
765// Explicit template instantiation
766// ===============================
767
768#if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
769 template class cuCSCMatrix<__nv_fp8_e5m2>;
770#endif
771
772#if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
773 template class cuCSCMatrix<__nv_fp8_e4m3>;
774#endif
775
776#if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
777 template class cuCSCMatrix<__half>;
778#endif
779
780#if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
781 template class cuCSCMatrix<__nv_bfloat16>;
782#endif
783
784#if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)
785 template class cuCSCMatrix<float>;
786#endif
787
788#if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)
789 template class cuCSCMatrix<double>;
790#endif
static void set_device(int device_id)
Sets the current device in multi-gpu applications.
Definition cuda_api.cu:191
static ArrayType * alloc(const size_t array_size)
Allocates memory on gpu device. This function creates a pointer and returns it.
Definition cuda_api.cu:39
static void del(void *device_array)
Deletes memory on gpu device if its pointer is not NULL, then sets the pointer to NULL.
Definition cuda_api.cu:169
static void alloc_bytes(void *&device_array, const size_t num_bytes)
Allocates memory on gpu device. This function uses an existing given pointer.
Definition cuda_api.cu:118
static int get_device()
Gets the current device in multi-gpu applications.
Definition cuda_api.cu:209
static void copy_to_device(const ArrayType *host_array, const size_t array_size, ArrayType *device_array)
Copies memory on host to device memory.
Definition cuda_api.cu:145
Base class for cLinearOperator and cuLinearOperator . This class is not templated so that both cpp an...
Container for CSC matrices.
virtual void transpose_dot(const DataType *device_vector, DataType *device_product)
Transposed-matrix vector product.
virtual void transpose_dot_plus(const DataType *device_vector, const DataType alpha, DataType *device_product)
Transposed-matrix vector product written in place.
virtual void dot(const DataType *device_vector, DataType *device_product)
Matrix vector product.
LongIndexType get_nnz() const
Returns the number of non-zero elements of the sparse matrix.
virtual FlagType is_identity_matrix() const
Checks whether the matrix is identity.
size_t * device_buffer_num_bytes
cuCSCMatrix()
Default constructor.
virtual void dot_plus(const DataType *device_vector, const DataType alpha, DataType *device_product)
Matrix vector product written in place.
void ** device_buffer
virtual void copy_host_to_device()
Copies the member data from the host memory to the device memory.
void allocate_buffer(const int device_id, cusparseOperation_t cusparse_operation, const DataType alpha, const DataType beta, cusparseDnVecDescr_t &cusparse_input_vector, cusparseDnVecDescr_t &cusparse_output_vector, cusparseSpMVAlg_t algorithm)
Allocates an external buffer for matrix-vector multiplication using cusparseSpMV function.
virtual ~cuCSCMatrix()
Destructor.
Base class for linear operators. This class serves as interface for all derived classes.
void initialize_cusparse_handle()
Creates a cusparseHandle_t object, if not created already.
Base class for constant matrices.
Definition cu_matrix.h:45
void omp_set_num_threads(int num_threads)
int omp_get_thread_num()
#define CUSPARSE_SPMV_ALG_DEFAULT
__host__ __device__ DataType abs(const DataType x)
Absolute value of a floating point number.
bool is_equal(DataType x, DataType y)
Check if two floating point numbers are equal within a tolerance.
void create_cusparse_csc_matrix(cusparseSpMatDescr_t &cusparse_matrix, const DataIndexType num_rows, const DataIndexType num_columns, const DataIndexType nnz, DataType *RESTRICT device_A_data, DataIndexType *RESTRICT device_A_indices, DataIndexType *RESTRICT device_A_index_pointer)
void cusparse_matvec(cusparseHandle_t cusparse_handle, cusparseOperation_t cusparse_operation, const DataType alpha, cusparseSpMatDescr_t cusparse_matrix, cusparseDnVecDescr_t cusparse_input_vector, const DataType beta, cusparseDnVecDescr_t cusparse_output_vector, cusparseSpMVAlg_t algorithm, void *external_buffer)
void destroy_cusparse_matrix(cusparseSpMatDescr_t &cusparse_matrix)
Destroy cusparse matrix.
void create_cusparse_vector(cusparseDnVecDescr_t &cusparse_vector, const LongIndexType vector_size, DataType *RESTRICT device_vector)
void destroy_cusparse_vector(cusparseDnVecDescr_t &cusparse_vector)
Destroys cusparse vector.
void cusparse_matrix_buffer_size(cusparseHandle_t cusparse_handle, cusparseOperation_t cusparse_operation, const DataType alpha, cusparseSpMatDescr_t cusparse_matrix, cusparseDnVecDescr_t cusparse_input_vector, const DataType beta, cusparseDnVecDescr_t cusparse_output_vector, cusparseSpMVAlg_t algorithm, size_t *buffer_size)
void create_cusparse_csr_matrix(cusparseSpMatDescr_t &cusparse_matrix, const DataIndexType num_rows, const DataIndexType num_columns, const DataIndexType nnz, DataType *RESTRICT device_A_data, DataIndexType *RESTRICT device_A_indices, DataIndexType *RESTRICT device_A_index_pointer)
int LongIndexType
Definition types.h:60
int FlagType
Definition types.h:68