imate
C++/CUDA Reference
Loading...
Searching...
No Matches
cusparse_api.cu
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <sameli@berkeley.edu>
3 * SPDX-License-Identifier: BSD-3-Clause
4 * SPDX-FileType: SOURCE
5 *
6 * This program is free software: you can redistribute it and/or modify it
7 * under the terms of the license found in the LICENSE.txt file in the root
8 * directory of this source tree.
9 */
10
11
12// =======
13// Headers
14// =======
15
16#include "./cusparse_api.h"
17#include "../_cu_definitions/cu_types.h" // __nv_fp8_e5m2, __nv_fp8_e4m3,
18 // __half, __nv_bfloat16
19#include <cassert> // assert
20#include <stdexcept> // std::runtime_error
21
22
23// ============
24// cusparse api
25// ============
26
32
33namespace cusparse_api
34{
35
36 // ==========================
37 // create cusparse csr matrix (__nv_fp8_e5m2, int32_t)
38 // ==========================
39
66
67 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
68 template<>
69 void create_cusparse_csr_matrix<__nv_fp8_e5m2, int32_t>(
70 cusparseSpMatDescr_t& cusparse_matrix,
71 const int32_t num_rows,
72 const int32_t num_columns,
73 const int32_t nnz,
74 __nv_fp8_e5m2* RESTRICT device_A_data,
75 int32_t* RESTRICT device_A_indices,
76 int32_t* RESTRICT device_A_index_pointer)
77 {
78 // TODO
79 throw std::runtime_error("Function not implemented.");
80 }
81 #endif
82
83
84 // ==========================
85 // create cusparse csr matrix (__nv_fp8_e5m2, int64_t)
86 // ==========================
87
114
115 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
116 template<>
117 void create_cusparse_csr_matrix<__nv_fp8_e5m2, int64_t>(
118 cusparseSpMatDescr_t& cusparse_matrix,
119 const int64_t num_rows,
120 const int64_t num_columns,
121 const int64_t nnz,
122 __nv_fp8_e5m2* RESTRICT device_A_data,
123 int64_t* RESTRICT device_A_indices,
124 int64_t* RESTRICT device_A_index_pointer)
125 {
126 // TODO
127 throw std::runtime_error("Function not implemented.");
128 }
129 #endif
130
131
132 // ==========================
133 // create cusparse csr matrix (__nv_fp8_e4m3, int32_t)
134 // ==========================
135
162
163 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
164 template<>
165 void create_cusparse_csr_matrix<__nv_fp8_e4m3, int32_t>(
166 cusparseSpMatDescr_t& cusparse_matrix,
167 const int32_t num_rows,
168 const int32_t num_columns,
169 const int32_t nnz,
170 __nv_fp8_e4m3* RESTRICT device_A_data,
171 int32_t* RESTRICT device_A_indices,
172 int32_t* RESTRICT device_A_index_pointer)
173 {
174 // TODO
175 throw std::runtime_error("Function not implemented.");
176 }
177 #endif
178
179
180 // ==========================
181 // create cusparse csr matrix (__nv_fp8_e4m3, int64_t)
182 // ==========================
183
210
211 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
212 template<>
213 void create_cusparse_csr_matrix<__nv_fp8_e4m3, int64_t>(
214 cusparseSpMatDescr_t& cusparse_matrix,
215 const int64_t num_rows,
216 const int64_t num_columns,
217 const int64_t nnz,
218 __nv_fp8_e4m3* RESTRICT device_A_data,
219 int64_t* RESTRICT device_A_indices,
220 int64_t* RESTRICT device_A_index_pointer)
221 {
222 // TODO
223 throw std::runtime_error("Function not implemented.");
224 }
225 #endif
226
227
228 // ==========================
229 // create cusparse csr matrix (__half, int32_t)
230 // ==========================
231
258
259 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
260 template<>
261 void create_cusparse_csr_matrix<__half, int32_t>(
262 cusparseSpMatDescr_t& cusparse_matrix,
263 const int32_t num_rows,
264 const int32_t num_columns,
265 const int32_t nnz,
266 __half* RESTRICT device_A_data,
267 int32_t* RESTRICT device_A_indices,
268 int32_t* RESTRICT device_A_index_pointer)
269 {
270 cusparseStatus_t status = cusparseCreateCsr(
271 &cusparse_matrix, num_rows, num_columns, nnz,
272 device_A_index_pointer, device_A_indices, device_A_data,
273 CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
274 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);
275
276 assert(status == CUSPARSE_STATUS_SUCCESS);
277 }
278 #endif
279
280
281 // ==========================
282 // create cusparse csr matrix (__half, int64_t)
283 // ==========================
284
311
312 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
313 template<>
314 void create_cusparse_csr_matrix<__half, int64_t>(
315 cusparseSpMatDescr_t& cusparse_matrix,
316 const int64_t num_rows,
317 const int64_t num_columns,
318 const int64_t nnz,
319 __half* RESTRICT device_A_data,
320 int64_t* RESTRICT device_A_indices,
321 int64_t* RESTRICT device_A_index_pointer)
322 {
323 cusparseStatus_t status = cusparseCreateCsr(
324 &cusparse_matrix, num_rows, num_columns, nnz,
325 device_A_index_pointer, device_A_indices, device_A_data,
326 CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,
327 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);
328
329 assert(status == CUSPARSE_STATUS_SUCCESS);
330 }
331 #endif
332
333
334 // ==========================
335 // create cusparse csr matrix (__nv_bfloat16, int32_t)
336 // ==========================
337
364
365 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
366 template<>
367 void create_cusparse_csr_matrix<__nv_bfloat16, int32_t>(
368 cusparseSpMatDescr_t& cusparse_matrix,
369 const int32_t num_rows,
370 const int32_t num_columns,
371 const int32_t nnz,
372 __nv_bfloat16* RESTRICT device_A_data,
373 int32_t* RESTRICT device_A_indices,
374 int32_t* RESTRICT device_A_index_pointer)
375 {
376 cusparseStatus_t status = cusparseCreateCsr(
377 &cusparse_matrix, num_rows, num_columns, nnz,
378 device_A_index_pointer, device_A_indices, device_A_data,
379 CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
380 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);
381
382 assert(status == CUSPARSE_STATUS_SUCCESS);
383 }
384 #endif
385
386
387 // ==========================
388 // create cusparse csr matrix (__nv_bfloat16, int64_t)
389 // ==========================
390
417
418 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
419 template<>
420 void create_cusparse_csr_matrix<__nv_bfloat16, int64_t>(
421 cusparseSpMatDescr_t& cusparse_matrix,
422 const int64_t num_rows,
423 const int64_t num_columns,
424 const int64_t nnz,
425 __nv_bfloat16* RESTRICT device_A_data,
426 int64_t* RESTRICT device_A_indices,
427 int64_t* RESTRICT device_A_index_pointer)
428 {
429 cusparseStatus_t status = cusparseCreateCsr(
430 &cusparse_matrix, num_rows, num_columns, nnz,
431 device_A_index_pointer, device_A_indices, device_A_data,
432 CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,
433 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);
434
435 assert(status == CUSPARSE_STATUS_SUCCESS);
436 }
437 #endif
438
439
440 // ==========================
441 // create cusparse csr matrix (float, int32_t)
442 // ==========================
443
470
471 #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)
472 template<>
474 cusparseSpMatDescr_t& cusparse_matrix,
475 const int32_t num_rows,
476 const int32_t num_columns,
477 const int32_t nnz,
478 float* RESTRICT device_A_data,
479 int32_t* RESTRICT device_A_indices,
480 int32_t* RESTRICT device_A_index_pointer)
481 {
482 cusparseStatus_t status = cusparseCreateCsr(
483 &cusparse_matrix, num_rows, num_columns, nnz,
484 device_A_index_pointer, device_A_indices, device_A_data,
485 CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
486 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
487
488 assert(status == CUSPARSE_STATUS_SUCCESS);
489 }
490 #endif
491
492
493 // ==========================
494 // create cusparse csr matrix (float, int64_t)
495 // ==========================
496
523
524 #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)
525 template<>
527 cusparseSpMatDescr_t& cusparse_matrix,
528 const int64_t num_rows,
529 const int64_t num_columns,
530 const int64_t nnz,
531 float* RESTRICT device_A_data,
532 int64_t* RESTRICT device_A_indices,
533 int64_t* RESTRICT device_A_index_pointer)
534 {
535 cusparseStatus_t status = cusparseCreateCsr(
536 &cusparse_matrix, num_rows, num_columns, nnz,
537 device_A_index_pointer, device_A_indices, device_A_data,
538 CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,
539 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
540
541 assert(status == CUSPARSE_STATUS_SUCCESS);
542 }
543 #endif
544
545
546 // ==========================
547 // create cusparse csr matrix (double, int32_t)
548 // ==========================
549
576
577 #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)
578 template<>
580 cusparseSpMatDescr_t& cusparse_matrix,
581 const int32_t num_rows,
582 const int32_t num_columns,
583 const int32_t nnz,
584 double* RESTRICT device_A_data,
585 int32_t* RESTRICT device_A_indices,
586 int32_t* RESTRICT device_A_index_pointer)
587 {
588 cusparseStatus_t status = cusparseCreateCsr(
589 &cusparse_matrix, num_rows, num_columns, nnz,
590 device_A_index_pointer, device_A_indices, device_A_data,
591 CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
592 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
593
594 assert(status == CUSPARSE_STATUS_SUCCESS);
595 }
596 #endif
597
598
599 // ==========================
600 // create cusparse csr matrix (double, int64_t)
601 // ==========================
602
629
630 #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)
631 template<>
633 cusparseSpMatDescr_t& cusparse_matrix,
634 const int64_t num_rows,
635 const int64_t num_columns,
636 const int64_t nnz,
637 double* RESTRICT device_A_data,
638 int64_t* RESTRICT device_A_indices,
639 int64_t* RESTRICT device_A_index_pointer)
640 {
641 cusparseStatus_t status = cusparseCreateCsr(
642 &cusparse_matrix, num_rows, num_columns, nnz,
643 device_A_index_pointer, device_A_indices, device_A_data,
644 CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,
645 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
646
647 assert(status == CUSPARSE_STATUS_SUCCESS);
648 }
649 #endif
650
651
652 // ==========================
653 // create cusparse csc matrix (__nv_fp8_e5m2, int32_t)
654 // ==========================
655
682
683 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
684 template<>
685 void create_cusparse_csc_matrix<__nv_fp8_e5m2, int32_t>(
686 cusparseSpMatDescr_t& cusparse_matrix,
687 const int32_t num_rows,
688 const int32_t num_columns,
689 const int32_t nnz,
690 __nv_fp8_e5m2* RESTRICT device_A_data,
691 int32_t* RESTRICT device_A_indices,
692 int32_t* RESTRICT device_A_index_pointer)
693 {
694 // TODO
695 throw std::runtime_error("Function not implemented.");
696 }
697 #endif
698
699
700 // ==========================
701 // create cusparse csc matrix (__nv_fp8_e5m2, int64_t)
702 // ==========================
703
730
731 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
732 template<>
733 void create_cusparse_csc_matrix<__nv_fp8_e5m2, int64_t>(
734 cusparseSpMatDescr_t& cusparse_matrix,
735 const int64_t num_rows,
736 const int64_t num_columns,
737 const int64_t nnz,
738 __nv_fp8_e5m2* RESTRICT device_A_data,
739 int64_t* RESTRICT device_A_indices,
740 int64_t* RESTRICT device_A_index_pointer)
741 {
742 // TODO
743 throw std::runtime_error("Function not implemented.");
744 }
745 #endif
746
747
748 // ==========================
749 // create cusparse csc matrix (__nv_fp8_e4m3, int32_t)
750 // ==========================
751
778
779 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
780 template<>
781 void create_cusparse_csc_matrix<__nv_fp8_e4m3, int32_t>(
782 cusparseSpMatDescr_t& cusparse_matrix,
783 const int32_t num_rows,
784 const int32_t num_columns,
785 const int32_t nnz,
786 __nv_fp8_e4m3* RESTRICT device_A_data,
787 int32_t* RESTRICT device_A_indices,
788 int32_t* RESTRICT device_A_index_pointer)
789 {
790 // TODO
791 throw std::runtime_error("Function not implemented.");
792 }
793 #endif
794
795
796 // ==========================
797 // create cusparse csc matrix (__nv_fp8_e4m3, int64_t)
798 // ==========================
799
826
827 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
828 template<>
829 void create_cusparse_csc_matrix<__nv_fp8_e4m3, int64_t>(
830 cusparseSpMatDescr_t& cusparse_matrix,
831 const int64_t num_rows,
832 const int64_t num_columns,
833 const int64_t nnz,
834 __nv_fp8_e4m3* RESTRICT device_A_data,
835 int64_t* RESTRICT device_A_indices,
836 int64_t* RESTRICT device_A_index_pointer)
837 {
838 // TODO
839 throw std::runtime_error("Function not implemented.");
840 }
841 #endif
842
843
844 // ==========================
845 // create cusparse csc matrix (__half, int32_t)
846 // ==========================
847
874
875 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
876 template<>
877 void create_cusparse_csc_matrix<__half, int32_t>(
878 cusparseSpMatDescr_t& cusparse_matrix,
879 const int32_t num_rows,
880 const int32_t num_columns,
881 const int32_t nnz,
882 __half* RESTRICT device_A_data,
883 int32_t* RESTRICT device_A_indices,
884 int32_t* RESTRICT device_A_index_pointer)
885 {
886 cusparseStatus_t status = cusparseCreateCsc(
887 &cusparse_matrix, num_rows, num_columns, nnz,
888 device_A_index_pointer, device_A_indices, device_A_data,
889 CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
890 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);
891
892 assert(status == CUSPARSE_STATUS_SUCCESS);
893 }
894 #endif
895
896
897 // ==========================
898 // create cusparse csc matrix (__half, int64_t)
899 // ==========================
900
927
928 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
929 template<>
930 void create_cusparse_csc_matrix<__half, int64_t>(
931 cusparseSpMatDescr_t& cusparse_matrix,
932 const int64_t num_rows,
933 const int64_t num_columns,
934 const int64_t nnz,
935 __half* RESTRICT device_A_data,
936 int64_t* RESTRICT device_A_indices,
937 int64_t* RESTRICT device_A_index_pointer)
938 {
939 cusparseStatus_t status = cusparseCreateCsc(
940 &cusparse_matrix, num_rows, num_columns, nnz,
941 device_A_index_pointer, device_A_indices, device_A_data,
942 CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,
943 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);
944
945 assert(status == CUSPARSE_STATUS_SUCCESS);
946 }
947 #endif
948
949
950 // ==========================
951 // create cusparse csc matrix (__nv_bfloat16, int32_t)
952 // ==========================
953
980
981 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
982 template<>
983 void create_cusparse_csc_matrix<__nv_bfloat16, int32_t>(
984 cusparseSpMatDescr_t& cusparse_matrix,
985 const int32_t num_rows,
986 const int32_t num_columns,
987 const int32_t nnz,
988 __nv_bfloat16* RESTRICT device_A_data,
989 int32_t* RESTRICT device_A_indices,
990 int32_t* RESTRICT device_A_index_pointer)
991 {
992 cusparseStatus_t status = cusparseCreateCsc(
993 &cusparse_matrix, num_rows, num_columns, nnz,
994 device_A_index_pointer, device_A_indices, device_A_data,
995 CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
996 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);
997
998 assert(status == CUSPARSE_STATUS_SUCCESS);
999 }
1000 #endif
1001
1002
1003 // ==========================
1004 // create cusparse csc matrix (__nv_bfloat16, int64_t)
1005 // ==========================
1006
1033
1034 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
1035 template<>
1036 void create_cusparse_csc_matrix<__nv_bfloat16, int64_t>(
1037 cusparseSpMatDescr_t& cusparse_matrix,
1038 const int64_t num_rows,
1039 const int64_t num_columns,
1040 const int64_t nnz,
1041 __nv_bfloat16* RESTRICT device_A_data,
1042 int64_t* RESTRICT device_A_indices,
1043 int64_t* RESTRICT device_A_index_pointer)
1044 {
1045 cusparseStatus_t status = cusparseCreateCsc(
1046 &cusparse_matrix, num_rows, num_columns, nnz,
1047 device_A_index_pointer, device_A_indices, device_A_data,
1048 CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,
1049 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);
1050
1051 assert(status == CUSPARSE_STATUS_SUCCESS);
1052 }
1053 #endif
1054
1055
1056 // ==========================
1057 // create cusparse csc matrix (float, int32_t)
1058 // ==========================
1059
1086
1087 #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)
1088 template<>
1090 cusparseSpMatDescr_t& cusparse_matrix,
1091 const int32_t num_rows,
1092 const int32_t num_columns,
1093 const int32_t nnz,
1094 float* RESTRICT device_A_data,
1095 int32_t* RESTRICT device_A_indices,
1096 int32_t* RESTRICT device_A_index_pointer)
1097 {
1098 cusparseStatus_t status = cusparseCreateCsc(
1099 &cusparse_matrix, num_rows, num_columns, nnz,
1100 device_A_index_pointer, device_A_indices, device_A_data,
1101 CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
1102 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
1103
1104 assert(status == CUSPARSE_STATUS_SUCCESS);
1105 }
1106 #endif
1107
1108
1109 // ==========================
1110 // create cusparse csc matrix (float, int64_t)
1111 // ==========================
1112
1139
1140 #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)
1141 template<>
1143 cusparseSpMatDescr_t& cusparse_matrix,
1144 const int64_t num_rows,
1145 const int64_t num_columns,
1146 const int64_t nnz,
1147 float* RESTRICT device_A_data,
1148 int64_t* RESTRICT device_A_indices,
1149 int64_t* RESTRICT device_A_index_pointer)
1150 {
1151 cusparseStatus_t status = cusparseCreateCsc(
1152 &cusparse_matrix, num_rows, num_columns, nnz,
1153 device_A_index_pointer, device_A_indices, device_A_data,
1154 CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,
1155 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
1156
1157 assert(status == CUSPARSE_STATUS_SUCCESS);
1158 }
1159 #endif
1160
1161
1162 // ==========================
1163 // create cusparse csc matrix (double, int32_t)
1164 // ==========================
1165
1192
1193 #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)
1194 template<>
1196 cusparseSpMatDescr_t& cusparse_matrix,
1197 const int32_t num_rows,
1198 const int32_t num_columns,
1199 const int32_t nnz,
1200 double* RESTRICT device_A_data,
1201 int32_t* RESTRICT device_A_indices,
1202 int32_t* RESTRICT device_A_index_pointer)
1203 {
1204 cusparseStatus_t status = cusparseCreateCsc(
1205 &cusparse_matrix, num_rows, num_columns, nnz,
1206 device_A_index_pointer, device_A_indices, device_A_data,
1207 CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
1208 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
1209
1210 assert(status == CUSPARSE_STATUS_SUCCESS);
1211 }
1212 #endif
1213
1214
1215 // ==========================
1216 // create cusparse csc matrix (double, int64_t)
1217 // ==========================
1218
1245
1246 #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)
1247 template<>
1249 cusparseSpMatDescr_t& cusparse_matrix,
1250 const int64_t num_rows,
1251 const int64_t num_columns,
1252 const int64_t nnz,
1253 double* RESTRICT device_A_data,
1254 int64_t* RESTRICT device_A_indices,
1255 int64_t* RESTRICT device_A_index_pointer)
1256 {
1257 cusparseStatus_t status = cusparseCreateCsc(
1258 &cusparse_matrix, num_rows, num_columns, nnz,
1259 device_A_index_pointer, device_A_indices, device_A_data,
1260 CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,
1261 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
1262
1263 assert(status == CUSPARSE_STATUS_SUCCESS);
1264 }
1265 #endif
1266
1267
1268 // ======================
1269 // create cusparse vector (__nv_fp8_e5m2)
1270 // ======================
1271
1289
1290 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
1291 template<>
1292 void create_cusparse_vector<__nv_fp8_e5m2>(
1293 cusparseDnVecDescr_t& cusparse_vector,
1294 const LongIndexType vector_size,
1295 __nv_fp8_e5m2* RESTRICT device_vector)
1296 {
1297 // TODO
1298 throw std::runtime_error("Function not implemented.");
1299 }
1300 #endif
1301
1302
1303 // ======================
1304 // create cusparse vector (__nv_fp8_e4m3)
1305 // ======================
1306
1324
1325 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
1326 template<>
1327 void create_cusparse_vector<__nv_fp8_e4m3>(
1328 cusparseDnVecDescr_t& cusparse_vector,
1329 const LongIndexType vector_size,
1330 __nv_fp8_e4m3* RESTRICT device_vector)
1331 {
1332 // TODO
1333 throw std::runtime_error("Function not implemented.");
1334 }
1335 #endif
1336
1337
1338 // ======================
1339 // create cusparse vector (__half)
1340 // ======================
1341
1359
1360 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
1361 template<>
1362 void create_cusparse_vector<__half>(
1363 cusparseDnVecDescr_t& cusparse_vector,
1364 const LongIndexType vector_size,
1365 __half* RESTRICT device_vector)
1366 {
1367 cusparseStatus_t status = cusparseCreateDnVec(
1368 &cusparse_vector, vector_size, device_vector, CUDA_R_16F);
1369
1370 assert(status == CUSPARSE_STATUS_SUCCESS);
1371 }
1372 #endif
1373
1374
1375 // ======================
1376 // create cusparse vector (__nv_bfloat16)
1377 // ======================
1378
1396
1397 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
1398 template<>
1399 void create_cusparse_vector<__nv_bfloat16>(
1400 cusparseDnVecDescr_t& cusparse_vector,
1401 const LongIndexType vector_size,
1402 __nv_bfloat16* RESTRICT device_vector)
1403 {
1404 cusparseStatus_t status = cusparseCreateDnVec(
1405 &cusparse_vector, vector_size, device_vector, CUDA_R_16F);
1406
1407 assert(status == CUSPARSE_STATUS_SUCCESS);
1408 }
1409 #endif
1410
1411
1412 // ======================
1413 // create cusparse vector (float)
1414 // ======================
1415
1433
1434 #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)
1435 template<>
1437 cusparseDnVecDescr_t& cusparse_vector,
1438 const LongIndexType vector_size,
1439 float* RESTRICT device_vector)
1440 {
1441 cusparseStatus_t status = cusparseCreateDnVec(
1442 &cusparse_vector, vector_size, device_vector, CUDA_R_32F);
1443
1444 assert(status == CUSPARSE_STATUS_SUCCESS);
1445 }
1446 #endif
1447
1448
1449 // ======================
1450 // create cusparse vector (double)
1451 // ======================
1452
1470
1471 #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)
1472 template<>
1474 cusparseDnVecDescr_t& cusparse_vector,
1475 const LongIndexType vector_size,
1476 double* RESTRICT device_vector)
1477 {
1478 cusparseStatus_t status = cusparseCreateDnVec(
1479 &cusparse_vector, vector_size, device_vector, CUDA_R_64F);
1480
1481 assert(status == CUSPARSE_STATUS_SUCCESS);
1482 }
1483 #endif
1484
1485
1486 // =======================
1487 // destroy cusparse matrix
1488 // =======================
1489
1500
1502 cusparseSpMatDescr_t& cusparse_matrix)
1503 {
1504 cusparseStatus_t status = cusparseDestroySpMat(cusparse_matrix);
1505 assert(status == CUSPARSE_STATUS_SUCCESS);
1506 }
1507
1508
1509 // =======================
1510 // destroy cusparse vector
1511 // =======================
1512
1522
1524 cusparseDnVecDescr_t& cusparse_vector)
1525 {
1526 cusparseStatus_t status = cusparseDestroyDnVec(cusparse_vector);
1527 assert(status == CUSPARSE_STATUS_SUCCESS);
1528 }
1529
1530
1531 // ===========================
1532 // cusparse matrix buffer size (__nv_fp8_e5m2)
1533 // ===========================
1534
1572
1573 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
1574 template<>
1575 void cusparse_matrix_buffer_size<__nv_fp8_e5m2>(
1576 cusparseHandle_t cusparse_handle,
1577 cusparseOperation_t cusparse_operation,
1578 const __nv_fp8_e5m2 alpha,
1579 cusparseSpMatDescr_t cusparse_matrix,
1580 cusparseDnVecDescr_t cusparse_input_vector,
1581 const __nv_fp8_e5m2 beta,
1582 cusparseDnVecDescr_t cusparse_output_vector,
1583 cusparseSpMVAlg_t algorithm,
1584 size_t* buffer_size)
1585 {
1586 // TODO
1587 throw std::runtime_error("Function not implemented.");
1588 }
1589 #endif
1590
1591
1592 // ===========================
1593 // cusparse matrix buffer size (__nv_fp8_e4m3)
1594 // ===========================
1595
1633
1634 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
1635 template<>
1636 void cusparse_matrix_buffer_size<__nv_fp8_e4m3>(
1637 cusparseHandle_t cusparse_handle,
1638 cusparseOperation_t cusparse_operation,
1639 const __nv_fp8_e4m3 alpha,
1640 cusparseSpMatDescr_t cusparse_matrix,
1641 cusparseDnVecDescr_t cusparse_input_vector,
1642 const __nv_fp8_e4m3 beta,
1643 cusparseDnVecDescr_t cusparse_output_vector,
1644 cusparseSpMVAlg_t algorithm,
1645 size_t* buffer_size)
1646 {
1647 // TODO
1648 throw std::runtime_error("Function not implemented.");
1649 }
1650 #endif
1651
1652
1653 // ===========================
1654 // cusparse matrix buffer size (__half)
1655 // ===========================
1656
1693
1694 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
1695 template<>
1696 void cusparse_matrix_buffer_size<__half>(
1697 cusparseHandle_t cusparse_handle,
1698 cusparseOperation_t cusparse_operation,
1699 const __half alpha,
1700 cusparseSpMatDescr_t cusparse_matrix,
1701 cusparseDnVecDescr_t cusparse_input_vector,
1702 const __half beta,
1703 cusparseDnVecDescr_t cusparse_output_vector,
1704 cusparseSpMVAlg_t algorithm,
1705 size_t* buffer_size)
1706 {
1707 cusparseStatus_t status = cusparseSpMV_bufferSize(
1708 cusparse_handle, cusparse_operation, &alpha, cusparse_matrix,
1709 cusparse_input_vector, &beta, cusparse_output_vector,
1710 CUDA_R_32F, algorithm, buffer_size);
1711
1712 assert(status == CUSPARSE_STATUS_SUCCESS);
1713 }
1714 #endif
1715
1716 // ===========================
1717 // cusparse matrix buffer size (__nv_bfloat16)
1718 // ===========================
1719
1757
1758 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
1759 template<>
1760 void cusparse_matrix_buffer_size<__nv_bfloat16>(
1761 cusparseHandle_t cusparse_handle,
1762 cusparseOperation_t cusparse_operation,
1763 const __nv_bfloat16 alpha,
1764 cusparseSpMatDescr_t cusparse_matrix,
1765 cusparseDnVecDescr_t cusparse_input_vector,
1766 const __nv_bfloat16 beta,
1767 cusparseDnVecDescr_t cusparse_output_vector,
1768 cusparseSpMVAlg_t algorithm,
1769 size_t* buffer_size)
1770 {
1771 cusparseStatus_t status = cusparseSpMV_bufferSize(
1772 cusparse_handle, cusparse_operation, &alpha, cusparse_matrix,
1773 cusparse_input_vector, &beta, cusparse_output_vector,
1774 CUDA_R_32F, algorithm, buffer_size);
1775
1776 assert(status == CUSPARSE_STATUS_SUCCESS);
1777 }
1778 #endif
1779
1780
1781 // ===========================
1782 // cusparse matrix buffer size (float)
1783 // ===========================
1784
1817
1818 #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)
1819 template<>
1821 cusparseHandle_t cusparse_handle,
1822 cusparseOperation_t cusparse_operation,
1823 const float alpha,
1824 cusparseSpMatDescr_t cusparse_matrix,
1825 cusparseDnVecDescr_t cusparse_input_vector,
1826 const float beta,
1827 cusparseDnVecDescr_t cusparse_output_vector,
1828 cusparseSpMVAlg_t algorithm,
1829 size_t* buffer_size)
1830 {
1831 cusparseStatus_t status = cusparseSpMV_bufferSize(
1832 cusparse_handle, cusparse_operation, &alpha, cusparse_matrix,
1833 cusparse_input_vector, &beta, cusparse_output_vector,
1834 CUDA_R_32F, algorithm, buffer_size);
1835
1836 assert(status == CUSPARSE_STATUS_SUCCESS);
1837 }
1838 #endif
1839
1840
1841 // ===========================
1842 // cusparse matrix buffer size (double)
1843 // ===========================
1844
1877
1878 #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)
1879 template<>
1881 cusparseHandle_t cusparse_handle,
1882 cusparseOperation_t cusparse_operation,
1883 const double alpha,
1884 cusparseSpMatDescr_t cusparse_matrix,
1885 cusparseDnVecDescr_t cusparse_input_vector,
1886 const double beta,
1887 cusparseDnVecDescr_t cusparse_output_vector,
1888 cusparseSpMVAlg_t algorithm,
1889 size_t* buffer_size)
1890 {
1891 cusparseStatus_t status = cusparseSpMV_bufferSize(
1892 cusparse_handle, cusparse_operation, &alpha, cusparse_matrix,
1893 cusparse_input_vector, &beta, cusparse_output_vector,
1894 CUDA_R_64F, algorithm, buffer_size);
1895
1896 assert(status == CUSPARSE_STATUS_SUCCESS);
1897 }
1898 #endif
1899
1900
1901 // ===============
1902 // cusparse matvec (__nv_fp8_e5m2)
1903 // ===============
1904
1940
1941 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
1942 template<>
1943 void cusparse_matvec<__nv_fp8_e5m2>(
1944 cusparseHandle_t cusparse_handle,
1945 cusparseOperation_t cusparse_operation,
1946 const __nv_fp8_e5m2 alpha,
1947 cusparseSpMatDescr_t cusparse_matrix,
1948 cusparseDnVecDescr_t cusparse_input_vector,
1949 const __nv_fp8_e5m2 beta,
1950 cusparseDnVecDescr_t cusparse_output_vector,
1951 cusparseSpMVAlg_t algorithm,
1952 void* external_buffer)
1953 {
1954 // TODO
1955 throw std::runtime_error("Function not implemented.");
1956 }
1957 #endif
1958
1959
1960 // ===============
1961 // cusparse matvec (__nv_fp8_e4m3)
1962 // ===============
1963
1999
2000 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
2001 template<>
2002 void cusparse_matvec<__nv_fp8_e4m3>(
2003 cusparseHandle_t cusparse_handle,
2004 cusparseOperation_t cusparse_operation,
2005 const __nv_fp8_e4m3 alpha,
2006 cusparseSpMatDescr_t cusparse_matrix,
2007 cusparseDnVecDescr_t cusparse_input_vector,
2008 const __nv_fp8_e4m3 beta,
2009 cusparseDnVecDescr_t cusparse_output_vector,
2010 cusparseSpMVAlg_t algorithm,
2011 void* external_buffer)
2012 {
2013 // TODO
2014 throw std::runtime_error("Function not implemented.");
2015 }
2016 #endif
2017
2018
2019 // ===============
2020 // cusparse matvec (__half)
2021 // ===============
2022
2058
2059 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
2060 template<>
2061 void cusparse_matvec<__half>(
2062 cusparseHandle_t cusparse_handle,
2063 cusparseOperation_t cusparse_operation,
2064 const __half alpha,
2065 cusparseSpMatDescr_t cusparse_matrix,
2066 cusparseDnVecDescr_t cusparse_input_vector,
2067 const __half beta,
2068 cusparseDnVecDescr_t cusparse_output_vector,
2069 cusparseSpMVAlg_t algorithm,
2070 void* external_buffer)
2071 {
2072 cusparseStatus_t status = cusparseSpMV(cusparse_handle,
2073 cusparse_operation, &alpha,
2074 cusparse_matrix,
2075 cusparse_input_vector, &beta,
2076 cusparse_output_vector,
2077 CUDA_R_32F, algorithm,
2078 external_buffer);
2079
2080 assert(status == CUSPARSE_STATUS_SUCCESS);
2081 }
2082 #endif
2083
2084
2085 // ===============
2086 // cusparse matvec (__nv_bfloat16)
2087 // ===============
2088
2124
2125 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
2126 template<>
2127 void cusparse_matvec<__nv_bfloat16>(
2128 cusparseHandle_t cusparse_handle,
2129 cusparseOperation_t cusparse_operation,
2130 const __nv_bfloat16 alpha,
2131 cusparseSpMatDescr_t cusparse_matrix,
2132 cusparseDnVecDescr_t cusparse_input_vector,
2133 const __nv_bfloat16 beta,
2134 cusparseDnVecDescr_t cusparse_output_vector,
2135 cusparseSpMVAlg_t algorithm,
2136 void* external_buffer)
2137 {
2138 cusparseStatus_t status = cusparseSpMV(cusparse_handle,
2139 cusparse_operation, &alpha,
2140 cusparse_matrix,
2141 cusparse_input_vector, &beta,
2142 cusparse_output_vector,
2143 CUDA_R_32F, algorithm,
2144 external_buffer);
2145
2146 assert(status == CUSPARSE_STATUS_SUCCESS);
2147 }
2148 #endif
2149
2150
2151 // ===============
2152 // cusparse matvec (float)
2153 // ===============
2154
2186
2187 #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)
2188 template<>
2190 cusparseHandle_t cusparse_handle,
2191 cusparseOperation_t cusparse_operation,
2192 const float alpha,
2193 cusparseSpMatDescr_t cusparse_matrix,
2194 cusparseDnVecDescr_t cusparse_input_vector,
2195 const float beta,
2196 cusparseDnVecDescr_t cusparse_output_vector,
2197 cusparseSpMVAlg_t algorithm,
2198 void* external_buffer)
2199 {
2200 cusparseStatus_t status = cusparseSpMV(cusparse_handle,
2201 cusparse_operation, &alpha,
2202 cusparse_matrix,
2203 cusparse_input_vector, &beta,
2204 cusparse_output_vector,
2205 CUDA_R_32F, algorithm,
2206 external_buffer);
2207
2208 assert(status == CUSPARSE_STATUS_SUCCESS);
2209 }
2210 #endif
2211
2212
2213 // ===============
2214 // cusparse matvec (double)
2215 // ===============
2216
2248
2249 #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)
2250 template<>
2252 cusparseHandle_t cusparse_handle,
2253 cusparseOperation_t cusparse_operation,
2254 const double alpha,
2255 cusparseSpMatDescr_t cusparse_matrix,
2256 cusparseDnVecDescr_t cusparse_input_vector,
2257 const double beta,
2258 cusparseDnVecDescr_t cusparse_output_vector,
2259 cusparseSpMVAlg_t algorithm,
2260 void* external_buffer)
2261 {
2262 cusparseStatus_t status = cusparseSpMV(cusparse_handle,
2263 cusparse_operation, &alpha,
2264 cusparse_matrix,
2265 cusparse_input_vector, &beta,
2266 cusparse_output_vector,
2267 CUDA_R_64F, algorithm,
2268 external_buffer);
2269
2270 assert(status == CUSPARSE_STATUS_SUCCESS);
2271 }
2272 #endif
2273
2274} // namespace cusparse_api
#define RESTRICT
cusparseStatus_t cusparseSpMV(cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, cusparseConstSpMatDescr_t matA, cusparseConstDnVecDescr_t vecX, const void *beta, cusparseDnVecDescr_t vecY, cudaDataType computeType, cusparseSpMVAlg_t alg, void *externalBuffer)
Definition of CUDA's cusparseSmMV function using dynamically loaded cublas library.
cusparseStatus_t cusparseCreateCsc(cusparseSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, int64_t nnz, void *csrRowOffsets, void *csrColInd, void *csrValues, cusparseIndexType_t csrRowOffsetsType, cusparseIndexType_t csrColIndType, cusparseIndexBase_t idxBase, cudaDataType valueType)
Definition of CUDA's cusparseCreateCsc function using dynamically loaded cublas library.
cusparseStatus_t cusparseDestroySpMat(cusparseConstSpMatDescr_t spMatDescr)
Definition of CUDA's cusparseDestroySpMat function using dynamically loaded cublas library.
cusparseStatus_t cusparseCreateDnVec(cusparseDnVecDescr_t *dnVecDescr, int64_t size, void *values, cudaDataType valueType)
Definition of CUDA's cusparseCreateDnVec function using dynamically loaded cublas library.
cusparseStatus_t cusparseSpMV_bufferSize(cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, cusparseConstSpMatDescr_t matA, cusparseConstDnVecDescr_t vecX, const void *beta, cusparseDnVecDescr_t vecY, cudaDataType computeType, cusparseSpMVAlg_t alg, size_t *bufferSize)
Definition of CUDA's cusparseSpMV_bufferSize function using dynamically loaded cublas library.
cusparseStatus_t cusparseCreateCsr(cusparseSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, int64_t nnz, void *csrRowOffsets, void *csrColInd, void *csrValues, cusparseIndexType_t csrRowOffsetsType, cusparseIndexType_t csrColIndType, cusparseIndexBase_t idxBase, cudaDataType valueType)
Definition of CUDA's cusparseCreateCsr function using dynamically loaded cublas library.
cusparseStatus_t cusparseDestroyDnVec(cusparseConstDnVecDescr_t dnVecDescr)
Definition of CUDA's cusparseDestroyDnVec function using dynamically loaded cublas library.
A collection of templates to wrapper cusparse functions.
void create_cusparse_csc_matrix< double, int64_t >(cusparseSpMatDescr_t &cusparse_matrix, const int64_t num_rows, const int64_t num_columns, const int64_t nnz, double *RESTRICT device_A_data, int64_t *RESTRICT device_A_indices, int64_t *RESTRICT device_A_index_pointer)
A template wrapper for cusparseCreateCsc for the double precision data and int64_t index type.
void create_cusparse_csr_matrix< double, int32_t >(cusparseSpMatDescr_t &cusparse_matrix, const int32_t num_rows, const int32_t num_columns, const int32_t nnz, double *RESTRICT device_A_data, int32_t *RESTRICT device_A_indices, int32_t *RESTRICT device_A_index_pointer)
A template wrapper for cusparseCreateCsr for the double precision data and int32_t index type.
void cusparse_matrix_buffer_size< float >(cusparseHandle_t cusparse_handle, cusparseOperation_t cusparse_operation, const float alpha, cusparseSpMatDescr_t cusparse_matrix, cusparseDnVecDescr_t cusparse_input_vector, const float beta, cusparseDnVecDescr_t cusparse_output_vector, cusparseSpMVAlg_t algorithm, size_t *buffer_size)
A template wrapper for cusparseSpMV_bufferSize for __nv_fp8_e5m2 precision data. This function determ...
void cusparse_matvec< float >(cusparseHandle_t cusparse_handle, cusparseOperation_t cusparse_operation, const float alpha, cusparseSpMatDescr_t cusparse_matrix, cusparseDnVecDescr_t cusparse_input_vector, const float beta, cusparseDnVecDescr_t cusparse_output_vector, cusparseSpMVAlg_t algorithm, void *external_buffer)
A wrapper for cusparseSpMV to perform sparse matrix-vector multiplication using __nv_fp8_e5m2 precisi...
void create_cusparse_csc_matrix< float, int32_t >(cusparseSpMatDescr_t &cusparse_matrix, const int32_t num_rows, const int32_t num_columns, const int32_t nnz, float *RESTRICT device_A_data, int32_t *RESTRICT device_A_indices, int32_t *RESTRICT device_A_index_pointer)
A template wrapper for cusparseCreateCsc for the __nv_fp8_e5m2 precision data and int32_t index type.
void create_cusparse_csr_matrix< double, int64_t >(cusparseSpMatDescr_t &cusparse_matrix, const int64_t num_rows, const int64_t num_columns, const int64_t nnz, double *RESTRICT device_A_data, int64_t *RESTRICT device_A_indices, int64_t *RESTRICT device_A_index_pointer)
A template wrapper for cusparseCreateCsr for the double precision data and int64_t index type.
void destroy_cusparse_matrix(cusparseSpMatDescr_t &cusparse_matrix)
Destroy cusparse matrix.
void create_cusparse_csc_matrix< double, int32_t >(cusparseSpMatDescr_t &cusparse_matrix, const int32_t num_rows, const int32_t num_columns, const int32_t nnz, double *RESTRICT device_A_data, int32_t *RESTRICT device_A_indices, int32_t *RESTRICT device_A_index_pointer)
A template wrapper for cusparseCreateCsc for the double precision data and int32_t index type.
void cusparse_matrix_buffer_size< double >(cusparseHandle_t cusparse_handle, cusparseOperation_t cusparse_operation, const double alpha, cusparseSpMatDescr_t cusparse_matrix, cusparseDnVecDescr_t cusparse_input_vector, const double beta, cusparseDnVecDescr_t cusparse_output_vector, cusparseSpMVAlg_t algorithm, size_t *buffer_size)
A template wrapper for cusparseSpMV_bufferSize for double precision data. This function determines th...
void create_cusparse_csc_matrix< float, int64_t >(cusparseSpMatDescr_t &cusparse_matrix, const int64_t num_rows, const int64_t num_columns, const int64_t nnz, float *RESTRICT device_A_data, int64_t *RESTRICT device_A_indices, int64_t *RESTRICT device_A_index_pointer)
A template wrapper for cusparseCreateCsc for the float precision data and int64_t index type.
void create_cusparse_vector< float >(cusparseDnVecDescr_t &cusparse_vector, const LongIndexType vector_size, float *RESTRICT device_vector)
A template wrapper for cusparseCreateDnVec for the __nv_fp8_e5m2 precision data.
void cusparse_matvec< double >(cusparseHandle_t cusparse_handle, cusparseOperation_t cusparse_operation, const double alpha, cusparseSpMatDescr_t cusparse_matrix, cusparseDnVecDescr_t cusparse_input_vector, const double beta, cusparseDnVecDescr_t cusparse_output_vector, cusparseSpMVAlg_t algorithm, void *external_buffer)
A wrapper for cusparseSpMV to perform sparse matrix-vector multiplication using double precision data...
void create_cusparse_csr_matrix< float, int32_t >(cusparseSpMatDescr_t &cusparse_matrix, const int32_t num_rows, const int32_t num_columns, const int32_t nnz, float *RESTRICT device_A_data, int32_t *RESTRICT device_A_indices, int32_t *RESTRICT device_A_index_pointer)
A template wrapper for cusparseCreateCsr for the __nv_fp8_e5m2 precision data and int32_t index type.
void destroy_cusparse_vector(cusparseDnVecDescr_t &cusparse_vector)
Destroys cusparse vector.
void create_cusparse_vector< double >(cusparseDnVecDescr_t &cusparse_vector, const LongIndexType vector_size, double *RESTRICT device_vector)
A template wrapper for cusparseCreateDnVec for the double precision data.
void create_cusparse_csr_matrix< float, int64_t >(cusparseSpMatDescr_t &cusparse_matrix, const int64_t num_rows, const int64_t num_columns, const int64_t nnz, float *RESTRICT device_A_data, int64_t *RESTRICT device_A_indices, int64_t *RESTRICT device_A_index_pointer)
A template wrapper for cusparseCreateCsr for the float precision data and int64_t index type.
int LongIndexType
Definition types.h:60