67 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
69 void create_cusparse_csr_matrix<__nv_fp8_e5m2, int32_t>(
70 cusparseSpMatDescr_t& cusparse_matrix,
71 const int32_t num_rows,
72 const int32_t num_columns,
76 int32_t*
RESTRICT device_A_index_pointer)
79 throw std::runtime_error(
"Function not implemented.");
115 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
117 void create_cusparse_csr_matrix<__nv_fp8_e5m2, int64_t>(
118 cusparseSpMatDescr_t& cusparse_matrix,
119 const int64_t num_rows,
120 const int64_t num_columns,
124 int64_t*
RESTRICT device_A_index_pointer)
127 throw std::runtime_error(
"Function not implemented.");
163 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
165 void create_cusparse_csr_matrix<__nv_fp8_e4m3, int32_t>(
166 cusparseSpMatDescr_t& cusparse_matrix,
167 const int32_t num_rows,
168 const int32_t num_columns,
172 int32_t*
RESTRICT device_A_index_pointer)
175 throw std::runtime_error(
"Function not implemented.");
211 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
213 void create_cusparse_csr_matrix<__nv_fp8_e4m3, int64_t>(
214 cusparseSpMatDescr_t& cusparse_matrix,
215 const int64_t num_rows,
216 const int64_t num_columns,
220 int64_t*
RESTRICT device_A_index_pointer)
223 throw std::runtime_error(
"Function not implemented.");
259 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
261 void create_cusparse_csr_matrix<__half, int32_t>(
262 cusparseSpMatDescr_t& cusparse_matrix,
263 const int32_t num_rows,
264 const int32_t num_columns,
268 int32_t*
RESTRICT device_A_index_pointer)
271 &cusparse_matrix, num_rows, num_columns, nnz,
272 device_A_index_pointer, device_A_indices, device_A_data,
273 CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
274 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);
276 assert(status == CUSPARSE_STATUS_SUCCESS);
312 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
314 void create_cusparse_csr_matrix<__half, int64_t>(
315 cusparseSpMatDescr_t& cusparse_matrix,
316 const int64_t num_rows,
317 const int64_t num_columns,
321 int64_t*
RESTRICT device_A_index_pointer)
324 &cusparse_matrix, num_rows, num_columns, nnz,
325 device_A_index_pointer, device_A_indices, device_A_data,
326 CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,
327 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);
329 assert(status == CUSPARSE_STATUS_SUCCESS);
365 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
367 void create_cusparse_csr_matrix<__nv_bfloat16, int32_t>(
368 cusparseSpMatDescr_t& cusparse_matrix,
369 const int32_t num_rows,
370 const int32_t num_columns,
372 __nv_bfloat16*
RESTRICT device_A_data,
374 int32_t*
RESTRICT device_A_index_pointer)
377 &cusparse_matrix, num_rows, num_columns, nnz,
378 device_A_index_pointer, device_A_indices, device_A_data,
379 CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
380 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);
382 assert(status == CUSPARSE_STATUS_SUCCESS);
418 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
420 void create_cusparse_csr_matrix<__nv_bfloat16, int64_t>(
421 cusparseSpMatDescr_t& cusparse_matrix,
422 const int64_t num_rows,
423 const int64_t num_columns,
425 __nv_bfloat16*
RESTRICT device_A_data,
427 int64_t*
RESTRICT device_A_index_pointer)
430 &cusparse_matrix, num_rows, num_columns, nnz,
431 device_A_index_pointer, device_A_indices, device_A_data,
432 CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,
433 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);
435 assert(status == CUSPARSE_STATUS_SUCCESS);
471 #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)
474 cusparseSpMatDescr_t& cusparse_matrix,
475 const int32_t num_rows,
476 const int32_t num_columns,
480 int32_t*
RESTRICT device_A_index_pointer)
483 &cusparse_matrix, num_rows, num_columns, nnz,
484 device_A_index_pointer, device_A_indices, device_A_data,
485 CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
486 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
488 assert(status == CUSPARSE_STATUS_SUCCESS);
524 #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)
527 cusparseSpMatDescr_t& cusparse_matrix,
528 const int64_t num_rows,
529 const int64_t num_columns,
533 int64_t*
RESTRICT device_A_index_pointer)
536 &cusparse_matrix, num_rows, num_columns, nnz,
537 device_A_index_pointer, device_A_indices, device_A_data,
538 CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,
539 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
541 assert(status == CUSPARSE_STATUS_SUCCESS);
577 #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)
580 cusparseSpMatDescr_t& cusparse_matrix,
581 const int32_t num_rows,
582 const int32_t num_columns,
586 int32_t*
RESTRICT device_A_index_pointer)
589 &cusparse_matrix, num_rows, num_columns, nnz,
590 device_A_index_pointer, device_A_indices, device_A_data,
591 CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
592 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
594 assert(status == CUSPARSE_STATUS_SUCCESS);
630 #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)
633 cusparseSpMatDescr_t& cusparse_matrix,
634 const int64_t num_rows,
635 const int64_t num_columns,
639 int64_t*
RESTRICT device_A_index_pointer)
642 &cusparse_matrix, num_rows, num_columns, nnz,
643 device_A_index_pointer, device_A_indices, device_A_data,
644 CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,
645 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
647 assert(status == CUSPARSE_STATUS_SUCCESS);
683 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
685 void create_cusparse_csc_matrix<__nv_fp8_e5m2, int32_t>(
686 cusparseSpMatDescr_t& cusparse_matrix,
687 const int32_t num_rows,
688 const int32_t num_columns,
692 int32_t*
RESTRICT device_A_index_pointer)
695 throw std::runtime_error(
"Function not implemented.");
731 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
733 void create_cusparse_csc_matrix<__nv_fp8_e5m2, int64_t>(
734 cusparseSpMatDescr_t& cusparse_matrix,
735 const int64_t num_rows,
736 const int64_t num_columns,
740 int64_t*
RESTRICT device_A_index_pointer)
743 throw std::runtime_error(
"Function not implemented.");
779 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
781 void create_cusparse_csc_matrix<__nv_fp8_e4m3, int32_t>(
782 cusparseSpMatDescr_t& cusparse_matrix,
783 const int32_t num_rows,
784 const int32_t num_columns,
788 int32_t*
RESTRICT device_A_index_pointer)
791 throw std::runtime_error(
"Function not implemented.");
827 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
829 void create_cusparse_csc_matrix<__nv_fp8_e4m3, int64_t>(
830 cusparseSpMatDescr_t& cusparse_matrix,
831 const int64_t num_rows,
832 const int64_t num_columns,
836 int64_t*
RESTRICT device_A_index_pointer)
839 throw std::runtime_error(
"Function not implemented.");
875 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
877 void create_cusparse_csc_matrix<__half, int32_t>(
878 cusparseSpMatDescr_t& cusparse_matrix,
879 const int32_t num_rows,
880 const int32_t num_columns,
884 int32_t*
RESTRICT device_A_index_pointer)
887 &cusparse_matrix, num_rows, num_columns, nnz,
888 device_A_index_pointer, device_A_indices, device_A_data,
889 CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
890 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);
892 assert(status == CUSPARSE_STATUS_SUCCESS);
928 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
930 void create_cusparse_csc_matrix<__half, int64_t>(
931 cusparseSpMatDescr_t& cusparse_matrix,
932 const int64_t num_rows,
933 const int64_t num_columns,
937 int64_t*
RESTRICT device_A_index_pointer)
940 &cusparse_matrix, num_rows, num_columns, nnz,
941 device_A_index_pointer, device_A_indices, device_A_data,
942 CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,
943 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);
945 assert(status == CUSPARSE_STATUS_SUCCESS);
981 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
983 void create_cusparse_csc_matrix<__nv_bfloat16, int32_t>(
984 cusparseSpMatDescr_t& cusparse_matrix,
985 const int32_t num_rows,
986 const int32_t num_columns,
988 __nv_bfloat16*
RESTRICT device_A_data,
990 int32_t*
RESTRICT device_A_index_pointer)
993 &cusparse_matrix, num_rows, num_columns, nnz,
994 device_A_index_pointer, device_A_indices, device_A_data,
995 CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
996 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);
998 assert(status == CUSPARSE_STATUS_SUCCESS);
1034 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
1036 void create_cusparse_csc_matrix<__nv_bfloat16, int64_t>(
1037 cusparseSpMatDescr_t& cusparse_matrix,
1038 const int64_t num_rows,
1039 const int64_t num_columns,
1041 __nv_bfloat16*
RESTRICT device_A_data,
1042 int64_t*
RESTRICT device_A_indices,
1043 int64_t*
RESTRICT device_A_index_pointer)
1046 &cusparse_matrix, num_rows, num_columns, nnz,
1047 device_A_index_pointer, device_A_indices, device_A_data,
1048 CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,
1049 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);
1051 assert(status == CUSPARSE_STATUS_SUCCESS);
1087 #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)
1090 cusparseSpMatDescr_t& cusparse_matrix,
1091 const int32_t num_rows,
1092 const int32_t num_columns,
1095 int32_t*
RESTRICT device_A_indices,
1096 int32_t*
RESTRICT device_A_index_pointer)
1099 &cusparse_matrix, num_rows, num_columns, nnz,
1100 device_A_index_pointer, device_A_indices, device_A_data,
1101 CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
1102 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
1104 assert(status == CUSPARSE_STATUS_SUCCESS);
1140 #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)
1143 cusparseSpMatDescr_t& cusparse_matrix,
1144 const int64_t num_rows,
1145 const int64_t num_columns,
1148 int64_t*
RESTRICT device_A_indices,
1149 int64_t*
RESTRICT device_A_index_pointer)
1152 &cusparse_matrix, num_rows, num_columns, nnz,
1153 device_A_index_pointer, device_A_indices, device_A_data,
1154 CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,
1155 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
1157 assert(status == CUSPARSE_STATUS_SUCCESS);
1193 #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)
1196 cusparseSpMatDescr_t& cusparse_matrix,
1197 const int32_t num_rows,
1198 const int32_t num_columns,
1201 int32_t*
RESTRICT device_A_indices,
1202 int32_t*
RESTRICT device_A_index_pointer)
1205 &cusparse_matrix, num_rows, num_columns, nnz,
1206 device_A_index_pointer, device_A_indices, device_A_data,
1207 CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
1208 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
1210 assert(status == CUSPARSE_STATUS_SUCCESS);
1246 #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)
1249 cusparseSpMatDescr_t& cusparse_matrix,
1250 const int64_t num_rows,
1251 const int64_t num_columns,
1254 int64_t*
RESTRICT device_A_indices,
1255 int64_t*
RESTRICT device_A_index_pointer)
1258 &cusparse_matrix, num_rows, num_columns, nnz,
1259 device_A_index_pointer, device_A_indices, device_A_data,
1260 CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,
1261 CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);
1263 assert(status == CUSPARSE_STATUS_SUCCESS);
1290 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
1292 void create_cusparse_vector<__nv_fp8_e5m2>(
1293 cusparseDnVecDescr_t& cusparse_vector,
1298 throw std::runtime_error(
"Function not implemented.");
1325 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
1327 void create_cusparse_vector<__nv_fp8_e4m3>(
1328 cusparseDnVecDescr_t& cusparse_vector,
1333 throw std::runtime_error(
"Function not implemented.");
1360 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
1362 void create_cusparse_vector<__half>(
1363 cusparseDnVecDescr_t& cusparse_vector,
1368 &cusparse_vector, vector_size, device_vector, CUDA_R_16F);
1370 assert(status == CUSPARSE_STATUS_SUCCESS);
1397 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
1399 void create_cusparse_vector<__nv_bfloat16>(
1400 cusparseDnVecDescr_t& cusparse_vector,
1402 __nv_bfloat16*
RESTRICT device_vector)
1405 &cusparse_vector, vector_size, device_vector, CUDA_R_16F);
1407 assert(status == CUSPARSE_STATUS_SUCCESS);
1434 #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)
1437 cusparseDnVecDescr_t& cusparse_vector,
1442 &cusparse_vector, vector_size, device_vector, CUDA_R_32F);
1444 assert(status == CUSPARSE_STATUS_SUCCESS);
1471 #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)
1474 cusparseDnVecDescr_t& cusparse_vector,
1479 &cusparse_vector, vector_size, device_vector, CUDA_R_64F);
1481 assert(status == CUSPARSE_STATUS_SUCCESS);
1502 cusparseSpMatDescr_t& cusparse_matrix)
1505 assert(status == CUSPARSE_STATUS_SUCCESS);
1524 cusparseDnVecDescr_t& cusparse_vector)
1527 assert(status == CUSPARSE_STATUS_SUCCESS);
1573 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
1575 void cusparse_matrix_buffer_size<__nv_fp8_e5m2>(
1576 cusparseHandle_t cusparse_handle,
1577 cusparseOperation_t cusparse_operation,
1579 cusparseSpMatDescr_t cusparse_matrix,
1580 cusparseDnVecDescr_t cusparse_input_vector,
1582 cusparseDnVecDescr_t cusparse_output_vector,
1583 cusparseSpMVAlg_t algorithm,
1584 size_t* buffer_size)
1587 throw std::runtime_error(
"Function not implemented.");
1634 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
1636 void cusparse_matrix_buffer_size<__nv_fp8_e4m3>(
1637 cusparseHandle_t cusparse_handle,
1638 cusparseOperation_t cusparse_operation,
1640 cusparseSpMatDescr_t cusparse_matrix,
1641 cusparseDnVecDescr_t cusparse_input_vector,
1643 cusparseDnVecDescr_t cusparse_output_vector,
1644 cusparseSpMVAlg_t algorithm,
1645 size_t* buffer_size)
1648 throw std::runtime_error(
"Function not implemented.");
1694 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
1696 void cusparse_matrix_buffer_size<__half>(
1697 cusparseHandle_t cusparse_handle,
1698 cusparseOperation_t cusparse_operation,
1700 cusparseSpMatDescr_t cusparse_matrix,
1701 cusparseDnVecDescr_t cusparse_input_vector,
1703 cusparseDnVecDescr_t cusparse_output_vector,
1704 cusparseSpMVAlg_t algorithm,
1705 size_t* buffer_size)
1708 cusparse_handle, cusparse_operation, &alpha, cusparse_matrix,
1709 cusparse_input_vector, &beta, cusparse_output_vector,
1710 CUDA_R_32F, algorithm, buffer_size);
1712 assert(status == CUSPARSE_STATUS_SUCCESS);
1758 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
1760 void cusparse_matrix_buffer_size<__nv_bfloat16>(
1761 cusparseHandle_t cusparse_handle,
1762 cusparseOperation_t cusparse_operation,
1763 const __nv_bfloat16 alpha,
1764 cusparseSpMatDescr_t cusparse_matrix,
1765 cusparseDnVecDescr_t cusparse_input_vector,
1766 const __nv_bfloat16 beta,
1767 cusparseDnVecDescr_t cusparse_output_vector,
1768 cusparseSpMVAlg_t algorithm,
1769 size_t* buffer_size)
1772 cusparse_handle, cusparse_operation, &alpha, cusparse_matrix,
1773 cusparse_input_vector, &beta, cusparse_output_vector,
1774 CUDA_R_32F, algorithm, buffer_size);
1776 assert(status == CUSPARSE_STATUS_SUCCESS);
1818 #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)
1821 cusparseHandle_t cusparse_handle,
1822 cusparseOperation_t cusparse_operation,
1824 cusparseSpMatDescr_t cusparse_matrix,
1825 cusparseDnVecDescr_t cusparse_input_vector,
1827 cusparseDnVecDescr_t cusparse_output_vector,
1828 cusparseSpMVAlg_t algorithm,
1829 size_t* buffer_size)
1832 cusparse_handle, cusparse_operation, &alpha, cusparse_matrix,
1833 cusparse_input_vector, &beta, cusparse_output_vector,
1834 CUDA_R_32F, algorithm, buffer_size);
1836 assert(status == CUSPARSE_STATUS_SUCCESS);
1878 #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)
1881 cusparseHandle_t cusparse_handle,
1882 cusparseOperation_t cusparse_operation,
1884 cusparseSpMatDescr_t cusparse_matrix,
1885 cusparseDnVecDescr_t cusparse_input_vector,
1887 cusparseDnVecDescr_t cusparse_output_vector,
1888 cusparseSpMVAlg_t algorithm,
1889 size_t* buffer_size)
1892 cusparse_handle, cusparse_operation, &alpha, cusparse_matrix,
1893 cusparse_input_vector, &beta, cusparse_output_vector,
1894 CUDA_R_64F, algorithm, buffer_size);
1896 assert(status == CUSPARSE_STATUS_SUCCESS);
1941 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
1943 void cusparse_matvec<__nv_fp8_e5m2>(
1944 cusparseHandle_t cusparse_handle,
1945 cusparseOperation_t cusparse_operation,
1947 cusparseSpMatDescr_t cusparse_matrix,
1948 cusparseDnVecDescr_t cusparse_input_vector,
1950 cusparseDnVecDescr_t cusparse_output_vector,
1951 cusparseSpMVAlg_t algorithm,
1952 void* external_buffer)
1955 throw std::runtime_error(
"Function not implemented.");
2000 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
2002 void cusparse_matvec<__nv_fp8_e4m3>(
2003 cusparseHandle_t cusparse_handle,
2004 cusparseOperation_t cusparse_operation,
2006 cusparseSpMatDescr_t cusparse_matrix,
2007 cusparseDnVecDescr_t cusparse_input_vector,
2009 cusparseDnVecDescr_t cusparse_output_vector,
2010 cusparseSpMVAlg_t algorithm,
2011 void* external_buffer)
2014 throw std::runtime_error(
"Function not implemented.");
2059 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
2061 void cusparse_matvec<__half>(
2062 cusparseHandle_t cusparse_handle,
2063 cusparseOperation_t cusparse_operation,
2065 cusparseSpMatDescr_t cusparse_matrix,
2066 cusparseDnVecDescr_t cusparse_input_vector,
2068 cusparseDnVecDescr_t cusparse_output_vector,
2069 cusparseSpMVAlg_t algorithm,
2070 void* external_buffer)
2072 cusparseStatus_t status =
cusparseSpMV(cusparse_handle,
2073 cusparse_operation, &alpha,
2075 cusparse_input_vector, &beta,
2076 cusparse_output_vector,
2077 CUDA_R_32F, algorithm,
2080 assert(status == CUSPARSE_STATUS_SUCCESS);
2125 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
2127 void cusparse_matvec<__nv_bfloat16>(
2128 cusparseHandle_t cusparse_handle,
2129 cusparseOperation_t cusparse_operation,
2130 const __nv_bfloat16 alpha,
2131 cusparseSpMatDescr_t cusparse_matrix,
2132 cusparseDnVecDescr_t cusparse_input_vector,
2133 const __nv_bfloat16 beta,
2134 cusparseDnVecDescr_t cusparse_output_vector,
2135 cusparseSpMVAlg_t algorithm,
2136 void* external_buffer)
2138 cusparseStatus_t status =
cusparseSpMV(cusparse_handle,
2139 cusparse_operation, &alpha,
2141 cusparse_input_vector, &beta,
2142 cusparse_output_vector,
2143 CUDA_R_32F, algorithm,
2146 assert(status == CUSPARSE_STATUS_SUCCESS);
2187 #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)
2190 cusparseHandle_t cusparse_handle,
2191 cusparseOperation_t cusparse_operation,
2193 cusparseSpMatDescr_t cusparse_matrix,
2194 cusparseDnVecDescr_t cusparse_input_vector,
2196 cusparseDnVecDescr_t cusparse_output_vector,
2197 cusparseSpMVAlg_t algorithm,
2198 void* external_buffer)
2200 cusparseStatus_t status =
cusparseSpMV(cusparse_handle,
2201 cusparse_operation, &alpha,
2203 cusparse_input_vector, &beta,
2204 cusparse_output_vector,
2205 CUDA_R_32F, algorithm,
2208 assert(status == CUSPARSE_STATUS_SUCCESS);
2249 #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)
2252 cusparseHandle_t cusparse_handle,
2253 cusparseOperation_t cusparse_operation,
2255 cusparseSpMatDescr_t cusparse_matrix,
2256 cusparseDnVecDescr_t cusparse_input_vector,
2258 cusparseDnVecDescr_t cusparse_output_vector,
2259 cusparseSpMVAlg_t algorithm,
2260 void* external_buffer)
2262 cusparseStatus_t status =
cusparseSpMV(cusparse_handle,
2263 cusparse_operation, &alpha,
2265 cusparse_input_vector, &beta,
2266 cusparse_output_vector,
2267 CUDA_R_64F, algorithm,
2270 assert(status == CUSPARSE_STATUS_SUCCESS);