11#ifndef _CU_BASIC_ALGEBRA_CUBLAS_IMPL_KERNELS_H_
12#define _CU_BASIC_ALGEBRA_CUBLAS_IMPL_KERNELS_H_
20 #define RESTRICT __restrict
21#elif defined(__INTEL_COMPILER)
22 #define RESTRICT __restrict
23#elif defined(__CUDA__) || defined(__GNUC__) || defined(__clang__)
24 #define RESTRICT __restrict__
75 typename DataType,
typename ComputeType,
unsigned int block_size>
90 template <
typename DataType>
99 template <
typename DataType>
102 const DataType alpha,
110 typename DataType,
typename ComputeType,
unsigned int block_size>
121 typename DataType,
typename ComputeType,
unsigned int block_size>
129 template <
typename DataType>
132 const DataType alpha,
Templated kernel code for implenentations of several BLAS-type functions in CUDA.
__global__ void cublasTscal_kernel(const int n, const DataType alpha, DataType *RESTRICT x, const int incx)
Performs .
__global__ void cublasTaxpy_kernel(const int n, const DataType alpha, const DataType *RESTRICT x, const int incx, DataType *RESTRICT y, const int incy)
Performs .
__global__ void cublasTnrm2_kernel(const int n, const DataType *RESTRICT x, const int incx, ComputeType *RESTRICT result)
Computes .
__global__ void cublasTcopy_kernel(const int n, const DataType *RESTRICT x, const int incx, DataType *RESTRICT y, const int incy)
Performs .
__global__ void cublasTdot_kernel(const int n, const DataType *RESTRICT x, const int incx, const DataType *RESTRICT y, const int incy, ComputeType *RESTRICT result)
Computes .
__global__ void cublasTgemv_kernel(const bool trans, const int m, const int n, const DataType alpha, const DataType *RESTRICT A, const int lda, const DataType *RESTRICT x, const int incx, const DataType beta, DataType *RESTRICT y, const int incy)
Performs the operation .