doxygen/html/cusparse__api_8cu_source.html

/*

 *  SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <sameli@berkeley.edu>

 *  SPDX-License-Identifier: BSD-3-Clause

 *  SPDX-FileType: SOURCE

 *

 *  This program is free software: you can redistribute it and/or modify it

 *  under the terms of the license found in the LICENSE.txt file in the root

 *  directory of this source tree.

 */


// =======

// Headers

// =======


#include "./cusparse_api.h"

#include "../_cu_definitions/cu_types.h" // __nv_fp8_e5m2, __nv_fp8_e4m3,

                                         // __half, __nv_bfloat16

#include <cassert>  // assert

#include <stdexcept>  // std::runtime_error


// ============

// cusparse api

// ============


namespace cusparse_api

{


    // ==========================

    // create cusparse csr matrix (__nv_fp8_e5m2, int32_t)

    // ==========================


    #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)

    template<>

    void create_cusparse_csr_matrix<__nv_fp8_e5m2, int32_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int32_t num_rows,

            const int32_t num_columns,

            const int32_t nnz,

            __nv_fp8_e5m2* RESTRICT device_A_data,

            int32_t* RESTRICT device_A_indices,

            int32_t* RESTRICT device_A_index_pointer)

    {

        // TODO

        throw std::runtime_error("Function not implemented.");

    }

    #endif


    // ==========================

    // create cusparse csr matrix (__nv_fp8_e5m2, int64_t)

    // ==========================


    #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)

    template<>

    void create_cusparse_csr_matrix<__nv_fp8_e5m2, int64_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int64_t num_rows,

            const int64_t num_columns,

            const int64_t nnz,

            __nv_fp8_e5m2* RESTRICT device_A_data,

            int64_t* RESTRICT device_A_indices,

            int64_t* RESTRICT device_A_index_pointer)

    {

        // TODO

        throw std::runtime_error("Function not implemented.");

    }

    #endif


    // ==========================

    // create cusparse csr matrix (__nv_fp8_e4m3, int32_t)

    // ==========================


    #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)

    template<>

    void create_cusparse_csr_matrix<__nv_fp8_e4m3, int32_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int32_t num_rows,

            const int32_t num_columns,

            const int32_t nnz,

            __nv_fp8_e4m3* RESTRICT device_A_data,

            int32_t* RESTRICT device_A_indices,

            int32_t* RESTRICT device_A_index_pointer)

    {

        // TODO

        throw std::runtime_error("Function not implemented.");

    }

    #endif


    // ==========================

    // create cusparse csr matrix (__nv_fp8_e4m3, int64_t)

    // ==========================


    #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)

    template<>

    void create_cusparse_csr_matrix<__nv_fp8_e4m3, int64_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int64_t num_rows,

            const int64_t num_columns,

            const int64_t nnz,

            __nv_fp8_e4m3* RESTRICT device_A_data,

            int64_t* RESTRICT device_A_indices,

            int64_t* RESTRICT device_A_index_pointer)

    {

        // TODO

        throw std::runtime_error("Function not implemented.");

    }

    #endif


    // ==========================

    // create cusparse csr matrix (__half, int32_t)

    // ==========================


    #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)

    template<>

    void create_cusparse_csr_matrix<__half, int32_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int32_t num_rows,

            const int32_t num_columns,

            const int32_t nnz,

            __half* RESTRICT device_A_data,

            int32_t* RESTRICT device_A_indices,

            int32_t* RESTRICT device_A_index_pointer)

    {

        cusparseStatus_t status = cusparseCreateCsr(

                &cusparse_matrix, num_rows, num_columns, nnz,

                device_A_index_pointer, device_A_indices, device_A_data,

                CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,

                CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }

    #endif


    // ==========================

    // create cusparse csr matrix (__half, int64_t)

    // ==========================


    #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)

    template<>

    void create_cusparse_csr_matrix<__half, int64_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int64_t num_rows,

            const int64_t num_columns,

            const int64_t nnz,

            __half* RESTRICT device_A_data,

            int64_t* RESTRICT device_A_indices,

            int64_t* RESTRICT device_A_index_pointer)

    {

        cusparseStatus_t status = cusparseCreateCsr(

                &cusparse_matrix, num_rows, num_columns, nnz,

                device_A_index_pointer, device_A_indices, device_A_data,

                CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,

                CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }

    #endif


    // ==========================

    // create cusparse csr matrix (__nv_bfloat16, int32_t)

    // ==========================


    #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)

    template<>

    void create_cusparse_csr_matrix<__nv_bfloat16, int32_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int32_t num_rows,

            const int32_t num_columns,

            const int32_t nnz,

            __nv_bfloat16* RESTRICT device_A_data,

            int32_t* RESTRICT device_A_indices,

            int32_t* RESTRICT device_A_index_pointer)

    {

        cusparseStatus_t status = cusparseCreateCsr(

                &cusparse_matrix, num_rows, num_columns, nnz,

                device_A_index_pointer, device_A_indices, device_A_data,

                CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,

                CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }

    #endif


    // ==========================

    // create cusparse csr matrix (__nv_bfloat16, int64_t)

    // ==========================


    #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)

    template<>

    void create_cusparse_csr_matrix<__nv_bfloat16, int64_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int64_t num_rows,

            const int64_t num_columns,

            const int64_t nnz,

            __nv_bfloat16* RESTRICT device_A_data,

            int64_t* RESTRICT device_A_indices,

            int64_t* RESTRICT device_A_index_pointer)

    {

        cusparseStatus_t status = cusparseCreateCsr(

                &cusparse_matrix, num_rows, num_columns, nnz,

                device_A_index_pointer, device_A_indices, device_A_data,

                CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,

                CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }

    #endif


    // ==========================

    // create cusparse csr matrix (float, int32_t)

    // ==========================


    #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)

    template<>


    void create_cusparse_csr_matrix<float, int32_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int32_t num_rows,

            const int32_t num_columns,

            const int32_t nnz,

            float* RESTRICT device_A_data,

            int32_t* RESTRICT device_A_indices,

            int32_t* RESTRICT device_A_index_pointer)

    {

        cusparseStatus_t status = cusparseCreateCsr(

                &cusparse_matrix, num_rows, num_columns, nnz,

                device_A_index_pointer, device_A_indices, device_A_data,

                CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,

                CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }


    #endif


    // ==========================

    // create cusparse csr matrix (float, int64_t)

    // ==========================


    #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)

    template<>


    void create_cusparse_csr_matrix<float, int64_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int64_t num_rows,

            const int64_t num_columns,

            const int64_t nnz,

            float* RESTRICT device_A_data,

            int64_t* RESTRICT device_A_indices,

            int64_t* RESTRICT device_A_index_pointer)

    {

        cusparseStatus_t status = cusparseCreateCsr(

                &cusparse_matrix, num_rows, num_columns, nnz,

                device_A_index_pointer, device_A_indices, device_A_data,

                CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,

                CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }


    #endif


    // ==========================

    // create cusparse csr matrix (double, int32_t)

    // ==========================


    #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)

    template<>


    void create_cusparse_csr_matrix<double, int32_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int32_t num_rows,

            const int32_t num_columns,

            const int32_t nnz,

            double* RESTRICT device_A_data,

            int32_t* RESTRICT device_A_indices,

            int32_t* RESTRICT device_A_index_pointer)

    {

        cusparseStatus_t status = cusparseCreateCsr(

                &cusparse_matrix, num_rows, num_columns, nnz,

                device_A_index_pointer, device_A_indices, device_A_data,

                CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,

                CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }


    #endif


    // ==========================

    // create cusparse csr matrix (double, int64_t)

    // ==========================


    #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)

    template<>


    void create_cusparse_csr_matrix<double, int64_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int64_t num_rows,

            const int64_t num_columns,

            const int64_t nnz,

            double* RESTRICT device_A_data,

            int64_t* RESTRICT device_A_indices,

            int64_t* RESTRICT device_A_index_pointer)

    {

        cusparseStatus_t status = cusparseCreateCsr(

                &cusparse_matrix, num_rows, num_columns, nnz,

                device_A_index_pointer, device_A_indices, device_A_data,

                CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,

                CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }


    #endif


    // ==========================

    // create cusparse csc matrix (__nv_fp8_e5m2, int32_t)

    // ==========================


    #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)

    template<>

    void create_cusparse_csc_matrix<__nv_fp8_e5m2, int32_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int32_t num_rows,

            const int32_t num_columns,

            const int32_t nnz,

            __nv_fp8_e5m2* RESTRICT device_A_data,

            int32_t* RESTRICT device_A_indices,

            int32_t* RESTRICT device_A_index_pointer)

    {

        // TODO

        throw std::runtime_error("Function not implemented.");

    }

    #endif


    // ==========================

    // create cusparse csc matrix (__nv_fp8_e5m2, int64_t)

    // ==========================


    #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)

    template<>

    void create_cusparse_csc_matrix<__nv_fp8_e5m2, int64_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int64_t num_rows,

            const int64_t num_columns,

            const int64_t nnz,

            __nv_fp8_e5m2* RESTRICT device_A_data,

            int64_t* RESTRICT device_A_indices,

            int64_t* RESTRICT device_A_index_pointer)

    {

        // TODO

        throw std::runtime_error("Function not implemented.");

    }

    #endif


    // ==========================

    // create cusparse csc matrix (__nv_fp8_e4m3, int32_t)

    // ==========================


    #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)

    template<>

    void create_cusparse_csc_matrix<__nv_fp8_e4m3, int32_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int32_t num_rows,

            const int32_t num_columns,

            const int32_t nnz,

            __nv_fp8_e4m3* RESTRICT device_A_data,

            int32_t* RESTRICT device_A_indices,

            int32_t* RESTRICT device_A_index_pointer)

    {

        // TODO

        throw std::runtime_error("Function not implemented.");

    }

    #endif


    // ==========================

    // create cusparse csc matrix (__nv_fp8_e4m3, int64_t)

    // ==========================


    #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)

    template<>

    void create_cusparse_csc_matrix<__nv_fp8_e4m3, int64_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int64_t num_rows,

            const int64_t num_columns,

            const int64_t nnz,

            __nv_fp8_e4m3* RESTRICT device_A_data,

            int64_t* RESTRICT device_A_indices,

            int64_t* RESTRICT device_A_index_pointer)

    {

        // TODO

        throw std::runtime_error("Function not implemented.");

    }

    #endif


    // ==========================

    // create cusparse csc matrix (__half, int32_t)

    // ==========================


    #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)

    template<>

    void create_cusparse_csc_matrix<__half, int32_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int32_t num_rows,

            const int32_t num_columns,

            const int32_t nnz,

            __half* RESTRICT device_A_data,

            int32_t* RESTRICT device_A_indices,

            int32_t* RESTRICT device_A_index_pointer)

    {

        cusparseStatus_t status = cusparseCreateCsc(

                &cusparse_matrix, num_rows, num_columns, nnz,

                device_A_index_pointer, device_A_indices, device_A_data,

                CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,

                CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }

    #endif


    // ==========================

    // create cusparse csc matrix (__half, int64_t)

    // ==========================


    #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)

    template<>

    void create_cusparse_csc_matrix<__half, int64_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int64_t num_rows,

            const int64_t num_columns,

            const int64_t nnz,

            __half* RESTRICT device_A_data,

            int64_t* RESTRICT device_A_indices,

            int64_t* RESTRICT device_A_index_pointer)

    {

        cusparseStatus_t status = cusparseCreateCsc(

                &cusparse_matrix, num_rows, num_columns, nnz,

                device_A_index_pointer, device_A_indices, device_A_data,

                CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,

                CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }

    #endif


    // ==========================

    // create cusparse csc matrix (__nv_bfloat16, int32_t)

    // ==========================


    #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)

    template<>

    void create_cusparse_csc_matrix<__nv_bfloat16, int32_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int32_t num_rows,

            const int32_t num_columns,

            const int32_t nnz,

            __nv_bfloat16* RESTRICT device_A_data,

            int32_t* RESTRICT device_A_indices,

            int32_t* RESTRICT device_A_index_pointer)

    {

        cusparseStatus_t status = cusparseCreateCsc(

                &cusparse_matrix, num_rows, num_columns, nnz,

                device_A_index_pointer, device_A_indices, device_A_data,

                CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,

                CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }

    #endif


    // ==========================

    // create cusparse csc matrix (__nv_bfloat16, int64_t)

    // ==========================


    #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)

    template<>

    void create_cusparse_csc_matrix<__nv_bfloat16, int64_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int64_t num_rows,

            const int64_t num_columns,

            const int64_t nnz,

            __nv_bfloat16* RESTRICT device_A_data,

            int64_t* RESTRICT device_A_indices,

            int64_t* RESTRICT device_A_index_pointer)

    {

        cusparseStatus_t status = cusparseCreateCsc(

                &cusparse_matrix, num_rows, num_columns, nnz,

                device_A_index_pointer, device_A_indices, device_A_data,

                CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,

                CUSPARSE_INDEX_BASE_ZERO, CUDA_R_16F);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }

    #endif


    // ==========================

    // create cusparse csc matrix (float, int32_t)

    // ==========================


    #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)

    template<>


    void create_cusparse_csc_matrix<float, int32_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int32_t num_rows,

            const int32_t num_columns,

            const int32_t nnz,

            float* RESTRICT device_A_data,

            int32_t* RESTRICT device_A_indices,

            int32_t* RESTRICT device_A_index_pointer)

    {

        cusparseStatus_t status = cusparseCreateCsc(

                &cusparse_matrix, num_rows, num_columns, nnz,

                device_A_index_pointer, device_A_indices, device_A_data,

                CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,

                CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }


    #endif


    // ==========================

    // create cusparse csc matrix (float, int64_t)

    // ==========================


    #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)

    template<>


    void create_cusparse_csc_matrix<float, int64_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int64_t num_rows,

            const int64_t num_columns,

            const int64_t nnz,

            float* RESTRICT device_A_data,

            int64_t* RESTRICT device_A_indices,

            int64_t* RESTRICT device_A_index_pointer)

    {

        cusparseStatus_t status = cusparseCreateCsc(

                &cusparse_matrix, num_rows, num_columns, nnz,

                device_A_index_pointer, device_A_indices, device_A_data,

                CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,

                CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }


    #endif


    // ==========================

    // create cusparse csc matrix (double, int32_t)

    // ==========================


    #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)

    template<>


    void create_cusparse_csc_matrix<double, int32_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int32_t num_rows,

            const int32_t num_columns,

            const int32_t nnz,

            double* RESTRICT device_A_data,

            int32_t* RESTRICT device_A_indices,

            int32_t* RESTRICT device_A_index_pointer)

    {

        cusparseStatus_t status = cusparseCreateCsc(

                &cusparse_matrix, num_rows, num_columns, nnz,

                device_A_index_pointer, device_A_indices, device_A_data,

                CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,

                CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }


    #endif


    // ==========================

    // create cusparse csc matrix (double, int64_t)

    // ==========================


    #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)

    template<>


    void create_cusparse_csc_matrix<double, int64_t>(

            cusparseSpMatDescr_t& cusparse_matrix,

            const int64_t num_rows,

            const int64_t num_columns,

            const int64_t nnz,

            double* RESTRICT device_A_data,

            int64_t* RESTRICT device_A_indices,

            int64_t* RESTRICT device_A_index_pointer)

    {

        cusparseStatus_t status = cusparseCreateCsc(

                &cusparse_matrix, num_rows, num_columns, nnz,

                device_A_index_pointer, device_A_indices, device_A_data,

                CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I,

                CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }


    #endif


    // ======================

    // create cusparse vector (__nv_fp8_e5m2)

    // ======================


    #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)

    template<>

    void create_cusparse_vector<__nv_fp8_e5m2>(

            cusparseDnVecDescr_t& cusparse_vector,

            const LongIndexType vector_size,

            __nv_fp8_e5m2* RESTRICT device_vector)

    {

        // TODO

        throw std::runtime_error("Function not implemented.");

    }

    #endif


    // ======================

    // create cusparse vector (__nv_fp8_e4m3)

    // ======================


    #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)

    template<>

    void create_cusparse_vector<__nv_fp8_e4m3>(

            cusparseDnVecDescr_t& cusparse_vector,

            const LongIndexType vector_size,

            __nv_fp8_e4m3* RESTRICT device_vector)

    {

        // TODO

        throw std::runtime_error("Function not implemented.");

    }

    #endif


    // ======================

    // create cusparse vector (__half)

    // ======================


    #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)

    template<>

    void create_cusparse_vector<__half>(

            cusparseDnVecDescr_t& cusparse_vector,

            const LongIndexType vector_size,

            __half* RESTRICT device_vector)

    {

        cusparseStatus_t status = cusparseCreateDnVec(

                &cusparse_vector, vector_size, device_vector, CUDA_R_16F);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }

    #endif


    // ======================

    // create cusparse vector (__nv_bfloat16)

    // ======================


    #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)

    template<>

    void create_cusparse_vector<__nv_bfloat16>(

            cusparseDnVecDescr_t& cusparse_vector,

            const LongIndexType vector_size,

            __nv_bfloat16* RESTRICT device_vector)

    {

        cusparseStatus_t status = cusparseCreateDnVec(

                &cusparse_vector, vector_size, device_vector, CUDA_R_16F);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }

    #endif


    // ======================

    // create cusparse vector (float)

    // ======================


    #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)

    template<>


    void create_cusparse_vector<float>(

            cusparseDnVecDescr_t& cusparse_vector,

            const LongIndexType vector_size,

            float* RESTRICT device_vector)

    {

        cusparseStatus_t status = cusparseCreateDnVec(

                &cusparse_vector, vector_size, device_vector, CUDA_R_32F);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }


    #endif


    // ======================

    // create cusparse vector (double)

    // ======================


    #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)

    template<>


    void create_cusparse_vector<double>(

            cusparseDnVecDescr_t& cusparse_vector,

            const LongIndexType vector_size,

            double* RESTRICT device_vector)

    {

        cusparseStatus_t status = cusparseCreateDnVec(

                &cusparse_vector, vector_size, device_vector, CUDA_R_64F);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }


    #endif


    // =======================

    // destroy cusparse matrix

    // =======================


    void destroy_cusparse_matrix(

            cusparseSpMatDescr_t& cusparse_matrix)

    {

        cusparseStatus_t status = cusparseDestroySpMat(cusparse_matrix);

        assert(status == CUSPARSE_STATUS_SUCCESS);

    }


    // =======================

    // destroy cusparse vector

    // =======================


    void destroy_cusparse_vector(

            cusparseDnVecDescr_t& cusparse_vector)

    {

        cusparseStatus_t status = cusparseDestroyDnVec(cusparse_vector);

        assert(status == CUSPARSE_STATUS_SUCCESS);

    }


    // ===========================

    // cusparse matrix buffer size (__nv_fp8_e5m2)

    // ===========================


    #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)

    template<>

    void cusparse_matrix_buffer_size<__nv_fp8_e5m2>(

            cusparseHandle_t cusparse_handle,

            cusparseOperation_t cusparse_operation,

            const __nv_fp8_e5m2 alpha,

            cusparseSpMatDescr_t cusparse_matrix,

            cusparseDnVecDescr_t cusparse_input_vector,

            const __nv_fp8_e5m2 beta,

            cusparseDnVecDescr_t cusparse_output_vector,

            cusparseSpMVAlg_t algorithm,

            size_t* buffer_size)

    {

        // TODO

        throw std::runtime_error("Function not implemented.");

    }

    #endif


    // ===========================

    // cusparse matrix buffer size (__nv_fp8_e4m3)

    // ===========================


    #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)

    template<>

    void cusparse_matrix_buffer_size<__nv_fp8_e4m3>(

            cusparseHandle_t cusparse_handle,

            cusparseOperation_t cusparse_operation,

            const __nv_fp8_e4m3 alpha,

            cusparseSpMatDescr_t cusparse_matrix,

            cusparseDnVecDescr_t cusparse_input_vector,

            const __nv_fp8_e4m3 beta,

            cusparseDnVecDescr_t cusparse_output_vector,

            cusparseSpMVAlg_t algorithm,

            size_t* buffer_size)

    {

        // TODO

        throw std::runtime_error("Function not implemented.");

    }

    #endif


    // ===========================

    // cusparse matrix buffer size (__half)

    // ===========================


    #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)

    template<>

    void cusparse_matrix_buffer_size<__half>(

            cusparseHandle_t cusparse_handle,

            cusparseOperation_t cusparse_operation,

            const __half alpha,

            cusparseSpMatDescr_t cusparse_matrix,

            cusparseDnVecDescr_t cusparse_input_vector,

            const __half beta,

            cusparseDnVecDescr_t cusparse_output_vector,

            cusparseSpMVAlg_t algorithm,

            size_t* buffer_size)

    {

        cusparseStatus_t status = cusparseSpMV_bufferSize(

                cusparse_handle, cusparse_operation, &alpha, cusparse_matrix,

                cusparse_input_vector, &beta, cusparse_output_vector,

                CUDA_R_32F, algorithm, buffer_size);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }

    #endif


    // ===========================

    // cusparse matrix buffer size (__nv_bfloat16)

    // ===========================


    #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)

    template<>

    void cusparse_matrix_buffer_size<__nv_bfloat16>(

            cusparseHandle_t cusparse_handle,

            cusparseOperation_t cusparse_operation,

            const __nv_bfloat16 alpha,

            cusparseSpMatDescr_t cusparse_matrix,

            cusparseDnVecDescr_t cusparse_input_vector,

            const __nv_bfloat16 beta,

            cusparseDnVecDescr_t cusparse_output_vector,

            cusparseSpMVAlg_t algorithm,

            size_t* buffer_size)

    {

        cusparseStatus_t status = cusparseSpMV_bufferSize(

                cusparse_handle, cusparse_operation, &alpha, cusparse_matrix,

                cusparse_input_vector, &beta, cusparse_output_vector,

                CUDA_R_32F, algorithm, buffer_size);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }

    #endif


    // ===========================

    // cusparse matrix buffer size (float)

    // ===========================


    #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)

    template<>


    void cusparse_matrix_buffer_size<float>(

            cusparseHandle_t cusparse_handle,

            cusparseOperation_t cusparse_operation,

            const float alpha,

            cusparseSpMatDescr_t cusparse_matrix,

            cusparseDnVecDescr_t cusparse_input_vector,

            const float beta,

            cusparseDnVecDescr_t cusparse_output_vector,

            cusparseSpMVAlg_t algorithm,

            size_t* buffer_size)

    {

        cusparseStatus_t status = cusparseSpMV_bufferSize(

                cusparse_handle, cusparse_operation, &alpha, cusparse_matrix,

                cusparse_input_vector, &beta, cusparse_output_vector,

                CUDA_R_32F, algorithm, buffer_size);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }


    #endif


    // ===========================

    // cusparse matrix buffer size (double)

    // ===========================


    #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)

    template<>


    void cusparse_matrix_buffer_size<double>(

            cusparseHandle_t cusparse_handle,

            cusparseOperation_t cusparse_operation,

            const double alpha,

            cusparseSpMatDescr_t cusparse_matrix,

            cusparseDnVecDescr_t cusparse_input_vector,

            const double beta,

            cusparseDnVecDescr_t cusparse_output_vector,

            cusparseSpMVAlg_t algorithm,

            size_t* buffer_size)

    {

        cusparseStatus_t status = cusparseSpMV_bufferSize(

                cusparse_handle, cusparse_operation, &alpha, cusparse_matrix,

                cusparse_input_vector, &beta, cusparse_output_vector,

                CUDA_R_64F, algorithm, buffer_size);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }


    #endif


    // ===============

    // cusparse matvec (__nv_fp8_e5m2)

    // ===============


    #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)

    template<>

    void cusparse_matvec<__nv_fp8_e5m2>(

            cusparseHandle_t cusparse_handle,

            cusparseOperation_t cusparse_operation,

            const __nv_fp8_e5m2 alpha,

            cusparseSpMatDescr_t cusparse_matrix,

            cusparseDnVecDescr_t cusparse_input_vector,

            const __nv_fp8_e5m2 beta,

            cusparseDnVecDescr_t cusparse_output_vector,

            cusparseSpMVAlg_t algorithm,

            void* external_buffer)

    {

        // TODO

        throw std::runtime_error("Function not implemented.");

    }

    #endif


    // ===============

    // cusparse matvec (__nv_fp8_e4m3)

    // ===============


    #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)

    template<>

    void cusparse_matvec<__nv_fp8_e4m3>(

            cusparseHandle_t cusparse_handle,

            cusparseOperation_t cusparse_operation,

            const __nv_fp8_e4m3 alpha,

            cusparseSpMatDescr_t cusparse_matrix,

            cusparseDnVecDescr_t cusparse_input_vector,

            const __nv_fp8_e4m3 beta,

            cusparseDnVecDescr_t cusparse_output_vector,

            cusparseSpMVAlg_t algorithm,

            void* external_buffer)

    {

        // TODO

        throw std::runtime_error("Function not implemented.");

    }

    #endif


    // ===============

    // cusparse matvec (__half)

    // ===============


    #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)

    template<>

    void cusparse_matvec<__half>(

            cusparseHandle_t cusparse_handle,

            cusparseOperation_t cusparse_operation,

            const __half alpha,

            cusparseSpMatDescr_t cusparse_matrix,

            cusparseDnVecDescr_t cusparse_input_vector,

            const __half beta,

            cusparseDnVecDescr_t cusparse_output_vector,

            cusparseSpMVAlg_t algorithm,

            void* external_buffer)

    {

        cusparseStatus_t status = cusparseSpMV(cusparse_handle,

                                               cusparse_operation, &alpha,

                                               cusparse_matrix,

                                               cusparse_input_vector, &beta,

                                               cusparse_output_vector,

                                               CUDA_R_32F, algorithm,

                                               external_buffer);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }

    #endif


    // ===============

    // cusparse matvec (__nv_bfloat16)

    // ===============


    #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)

    template<>

    void cusparse_matvec<__nv_bfloat16>(

            cusparseHandle_t cusparse_handle,

            cusparseOperation_t cusparse_operation,

            const __nv_bfloat16 alpha,

            cusparseSpMatDescr_t cusparse_matrix,

            cusparseDnVecDescr_t cusparse_input_vector,

            const __nv_bfloat16 beta,

            cusparseDnVecDescr_t cusparse_output_vector,

            cusparseSpMVAlg_t algorithm,

            void* external_buffer)

    {

        cusparseStatus_t status = cusparseSpMV(cusparse_handle,

                                               cusparse_operation, &alpha,

                                               cusparse_matrix,

                                               cusparse_input_vector, &beta,

                                               cusparse_output_vector,

                                               CUDA_R_32F, algorithm,

                                               external_buffer);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }

    #endif


    // ===============

    // cusparse matvec (float)

    // ===============


    #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)

    template<>


    void cusparse_matvec<float>(

            cusparseHandle_t cusparse_handle,

            cusparseOperation_t cusparse_operation,

            const float alpha,

            cusparseSpMatDescr_t cusparse_matrix,

            cusparseDnVecDescr_t cusparse_input_vector,

            const float beta,

            cusparseDnVecDescr_t cusparse_output_vector,

            cusparseSpMVAlg_t algorithm,

            void* external_buffer)

    {

        cusparseStatus_t status = cusparseSpMV(cusparse_handle,

                                               cusparse_operation, &alpha,

                                               cusparse_matrix,

                                               cusparse_input_vector, &beta,

                                               cusparse_output_vector,

                                               CUDA_R_32F, algorithm,

                                               external_buffer);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }


    #endif


    // ===============

    // cusparse matvec (double)

    // ===============


    #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)

    template<>


    void cusparse_matvec<double>(

            cusparseHandle_t cusparse_handle,

            cusparseOperation_t cusparse_operation,

            const double alpha,

            cusparseSpMatDescr_t cusparse_matrix,

            cusparseDnVecDescr_t cusparse_input_vector,

            const double beta,

            cusparseDnVecDescr_t cusparse_output_vector,

            cusparseSpMVAlg_t algorithm,

            void* external_buffer)

    {

        cusparseStatus_t status = cusparseSpMV(cusparse_handle,

                                               cusparse_operation, &alpha,

                                               cusparse_matrix,

                                               cusparse_input_vector, &beta,

                                               cusparse_output_vector,

                                               CUDA_R_64F, algorithm,

                                               external_buffer);


        assert(status == CUSPARSE_STATUS_SUCCESS);

    }


    #endif


}  // namespace cusparse_api


RESTRICT
#define RESTRICT
Definition c_matrix_operations.h:29

cusparse_api.h

cusparseSpMV
cusparseStatus_t cusparseSpMV(cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, cusparseConstSpMatDescr_t matA, cusparseConstDnVecDescr_t vecX, const void *beta, cusparseDnVecDescr_t vecY, cudaDataType computeType, cusparseSpMVAlg_t alg, void *externalBuffer)
Definition of CUDA's cusparseSmMV function using dynamically loaded cublas library.
Definition cusparse_symbols.cpp:330

cusparseCreateCsc
cusparseStatus_t cusparseCreateCsc(cusparseSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, int64_t nnz, void *csrRowOffsets, void *csrColInd, void *csrValues, cusparseIndexType_t csrRowOffsetsType, cusparseIndexType_t csrColIndType, cusparseIndexBase_t idxBase, cudaDataType valueType)
Definition of CUDA's cusparseCreateCsc function using dynamically loaded cublas library.
Definition cusparse_symbols.cpp:178

cusparseDestroySpMat
cusparseStatus_t cusparseDestroySpMat(cusparseConstSpMatDescr_t spMatDescr)
Definition of CUDA's cusparseDestroySpMat function using dynamically loaded cublas library.
Definition cusparse_symbols.cpp:244

cusparseCreateDnVec
cusparseStatus_t cusparseCreateDnVec(cusparseDnVecDescr_t *dnVecDescr, int64_t size, void *values, cudaDataType valueType)
Definition of CUDA's cusparseCreateDnVec function using dynamically loaded cublas library.
Definition cusparse_symbols.cpp:215

cusparseSpMV_bufferSize
cusparseStatus_t cusparseSpMV_bufferSize(cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, cusparseConstSpMatDescr_t matA, cusparseConstDnVecDescr_t vecX, const void *beta, cusparseDnVecDescr_t vecY, cudaDataType computeType, cusparseSpMVAlg_t alg, size_t *bufferSize)
Definition of CUDA's cusparseSpMV_bufferSize function using dynamically loaded cublas library.
Definition cusparse_symbols.cpp:294

cusparseCreateCsr
cusparseStatus_t cusparseCreateCsr(cusparseSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, int64_t nnz, void *csrRowOffsets, void *csrColInd, void *csrValues, cusparseIndexType_t csrRowOffsetsType, cusparseIndexType_t csrColIndType, cusparseIndexBase_t idxBase, cudaDataType valueType)
Definition of CUDA's cusparseCreateCsr function using dynamically loaded cublas library.
Definition cusparse_symbols.cpp:141

cusparseDestroyDnVec
cusparseStatus_t cusparseDestroyDnVec(cusparseConstDnVecDescr_t dnVecDescr)
Definition of CUDA's cusparseDestroyDnVec function using dynamically loaded cublas library.
Definition cusparse_symbols.cpp:269

cusparse_api
A collection of templates to wrapper cusparse functions.
Definition cusparse_api.cu:34

cusparse_api::create_cusparse_csc_matrix< double, int64_t >
void create_cusparse_csc_matrix< double, int64_t >(cusparseSpMatDescr_t &cusparse_matrix, const int64_t num_rows, const int64_t num_columns, const int64_t nnz, double *RESTRICT device_A_data, int64_t *RESTRICT device_A_indices, int64_t *RESTRICT device_A_index_pointer)
A template wrapper for cusparseCreateCsc for the double precision data and int64_t index type.
Definition cusparse_api.cu:1248

cusparse_api::create_cusparse_csr_matrix< double, int32_t >
void create_cusparse_csr_matrix< double, int32_t >(cusparseSpMatDescr_t &cusparse_matrix, const int32_t num_rows, const int32_t num_columns, const int32_t nnz, double *RESTRICT device_A_data, int32_t *RESTRICT device_A_indices, int32_t *RESTRICT device_A_index_pointer)
A template wrapper for cusparseCreateCsr for the double precision data and int32_t index type.
Definition cusparse_api.cu:579

cusparse_api::cusparse_matrix_buffer_size< float >
void cusparse_matrix_buffer_size< float >(cusparseHandle_t cusparse_handle, cusparseOperation_t cusparse_operation, const float alpha, cusparseSpMatDescr_t cusparse_matrix, cusparseDnVecDescr_t cusparse_input_vector, const float beta, cusparseDnVecDescr_t cusparse_output_vector, cusparseSpMVAlg_t algorithm, size_t *buffer_size)
A template wrapper for cusparseSpMV_bufferSize for __nv_fp8_e5m2 precision data. This function determ...
Definition cusparse_api.cu:1820

cusparse_api::cusparse_matvec< float >
void cusparse_matvec< float >(cusparseHandle_t cusparse_handle, cusparseOperation_t cusparse_operation, const float alpha, cusparseSpMatDescr_t cusparse_matrix, cusparseDnVecDescr_t cusparse_input_vector, const float beta, cusparseDnVecDescr_t cusparse_output_vector, cusparseSpMVAlg_t algorithm, void *external_buffer)
A wrapper for cusparseSpMV to perform sparse matrix-vector multiplication using __nv_fp8_e5m2 precisi...
Definition cusparse_api.cu:2189

cusparse_api::create_cusparse_csc_matrix< float, int32_t >
void create_cusparse_csc_matrix< float, int32_t >(cusparseSpMatDescr_t &cusparse_matrix, const int32_t num_rows, const int32_t num_columns, const int32_t nnz, float *RESTRICT device_A_data, int32_t *RESTRICT device_A_indices, int32_t *RESTRICT device_A_index_pointer)
A template wrapper for cusparseCreateCsc for the __nv_fp8_e5m2 precision data and int32_t index type.
Definition cusparse_api.cu:1089

cusparse_api::create_cusparse_csr_matrix< double, int64_t >
void create_cusparse_csr_matrix< double, int64_t >(cusparseSpMatDescr_t &cusparse_matrix, const int64_t num_rows, const int64_t num_columns, const int64_t nnz, double *RESTRICT device_A_data, int64_t *RESTRICT device_A_indices, int64_t *RESTRICT device_A_index_pointer)
A template wrapper for cusparseCreateCsr for the double precision data and int64_t index type.
Definition cusparse_api.cu:632

cusparse_api::destroy_cusparse_matrix
void destroy_cusparse_matrix(cusparseSpMatDescr_t &cusparse_matrix)
Destroy cusparse matrix.
Definition cusparse_api.cu:1501

cusparse_api::create_cusparse_csc_matrix< double, int32_t >
void create_cusparse_csc_matrix< double, int32_t >(cusparseSpMatDescr_t &cusparse_matrix, const int32_t num_rows, const int32_t num_columns, const int32_t nnz, double *RESTRICT device_A_data, int32_t *RESTRICT device_A_indices, int32_t *RESTRICT device_A_index_pointer)
A template wrapper for cusparseCreateCsc for the double precision data and int32_t index type.
Definition cusparse_api.cu:1195

cusparse_api::cusparse_matrix_buffer_size< double >
void cusparse_matrix_buffer_size< double >(cusparseHandle_t cusparse_handle, cusparseOperation_t cusparse_operation, const double alpha, cusparseSpMatDescr_t cusparse_matrix, cusparseDnVecDescr_t cusparse_input_vector, const double beta, cusparseDnVecDescr_t cusparse_output_vector, cusparseSpMVAlg_t algorithm, size_t *buffer_size)
A template wrapper for cusparseSpMV_bufferSize for double precision data. This function determines th...
Definition cusparse_api.cu:1880

cusparse_api::create_cusparse_csc_matrix< float, int64_t >
void create_cusparse_csc_matrix< float, int64_t >(cusparseSpMatDescr_t &cusparse_matrix, const int64_t num_rows, const int64_t num_columns, const int64_t nnz, float *RESTRICT device_A_data, int64_t *RESTRICT device_A_indices, int64_t *RESTRICT device_A_index_pointer)
A template wrapper for cusparseCreateCsc for the float precision data and int64_t index type.
Definition cusparse_api.cu:1142

cusparse_api::create_cusparse_vector< float >
void create_cusparse_vector< float >(cusparseDnVecDescr_t &cusparse_vector, const LongIndexType vector_size, float *RESTRICT device_vector)
A template wrapper for cusparseCreateDnVec for the __nv_fp8_e5m2 precision data.
Definition cusparse_api.cu:1436

cusparse_api::cusparse_matvec< double >
void cusparse_matvec< double >(cusparseHandle_t cusparse_handle, cusparseOperation_t cusparse_operation, const double alpha, cusparseSpMatDescr_t cusparse_matrix, cusparseDnVecDescr_t cusparse_input_vector, const double beta, cusparseDnVecDescr_t cusparse_output_vector, cusparseSpMVAlg_t algorithm, void *external_buffer)
A wrapper for cusparseSpMV to perform sparse matrix-vector multiplication using double precision data...
Definition cusparse_api.cu:2251

cusparse_api::create_cusparse_csr_matrix< float, int32_t >
void create_cusparse_csr_matrix< float, int32_t >(cusparseSpMatDescr_t &cusparse_matrix, const int32_t num_rows, const int32_t num_columns, const int32_t nnz, float *RESTRICT device_A_data, int32_t *RESTRICT device_A_indices, int32_t *RESTRICT device_A_index_pointer)
A template wrapper for cusparseCreateCsr for the __nv_fp8_e5m2 precision data and int32_t index type.
Definition cusparse_api.cu:473

cusparse_api::destroy_cusparse_vector
void destroy_cusparse_vector(cusparseDnVecDescr_t &cusparse_vector)
Destroys cusparse vector.
Definition cusparse_api.cu:1523

cusparse_api::create_cusparse_vector< double >
void create_cusparse_vector< double >(cusparseDnVecDescr_t &cusparse_vector, const LongIndexType vector_size, double *RESTRICT device_vector)
A template wrapper for cusparseCreateDnVec for the double precision data.
Definition cusparse_api.cu:1473

cusparse_api::create_cusparse_csr_matrix< float, int64_t >
void create_cusparse_csr_matrix< float, int64_t >(cusparseSpMatDescr_t &cusparse_matrix, const int64_t num_rows, const int64_t num_columns, const int64_t nnz, float *RESTRICT device_A_data, int64_t *RESTRICT device_A_indices, int64_t *RESTRICT device_A_index_pointer)
A template wrapper for cusparseCreateCsr for the float precision data and int64_t index type.
Definition cusparse_api.cu:526

__nv_fp8_e4m3
Definition cu_types.h:34

__nv_fp8_e5m2
Definition cu_types.h:27

LongIndexType
int LongIndexType
Definition types.h:60