imate
C++/CUDA Reference
Loading...
Searching...
No Matches
cublas_impl_kernels.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <sameli@berkeley.edu>
3 * SPDX-License-Identifier: BSD-3-Clause
4 * SPDX-FileType: SOURCE
5 *
6 * This program is free software: you can redistribute it and/or modify it
7 * under the terms of the license found in the LICENSE.txt file in the root
8 * directory of this source tree.
9 */
10
11#ifndef _CU_BASIC_ALGEBRA_CUBLAS_IMPL_KERNELS_H_
12#define _CU_BASIC_ALGEBRA_CUBLAS_IMPL_KERNELS_H_
13
14// =======
15// Headers
16// =======
17
18// Restrict qualifier
19#if defined(_MSC_VER)
20 #define RESTRICT __restrict
21#elif defined(__INTEL_COMPILER)
22 #define RESTRICT __restrict
23#elif defined(__CUDA__) || defined(__GNUC__) || defined(__clang__)
24 #define RESTRICT __restrict__
25#else
26 #define RESTRICT
27#endif
28
29
30// ==================
31// cublas impl kernel
32// ==================
33
70
71namespace cublas_impl_kernels
72{
73 // cublasTgemv kernel
74 template <
75 typename DataType, typename ComputeType, unsigned int block_size>
76 __global__ void cublasTgemv_kernel(
77 const bool trans,
78 const int m,
79 const int n,
80 const DataType alpha,
81 const DataType* RESTRICT A,
82 const int lda,
83 const DataType* RESTRICT x,
84 const int incx,
85 const DataType beta,
86 DataType* RESTRICT y,
87 const int incy);
88
89 // cublasTcopy kernel
90 template <typename DataType>
91 __global__ void cublasTcopy_kernel(
92 const int n,
93 const DataType* RESTRICT x,
94 const int incx,
95 DataType* RESTRICT y,
96 const int incy);
97
98 // cublasTaxpy kernel
99 template <typename DataType>
100 __global__ void cublasTaxpy_kernel(
101 const int n,
102 const DataType alpha,
103 const DataType* RESTRICT x,
104 const int incx,
105 DataType* RESTRICT y,
106 const int incy);
107
108 // cublasTdot kernel
109 template <
110 typename DataType, typename ComputeType, unsigned int block_size>
111 __global__ void cublasTdot_kernel(
112 const int n,
113 const DataType* RESTRICT x,
114 const int incx,
115 const DataType* RESTRICT y,
116 const int incy,
117 ComputeType* RESTRICT result);
118
119 // cublasTnrm2 kernel
120 template <
121 typename DataType, typename ComputeType, unsigned int block_size>
122 __global__ void cublasTnrm2_kernel(
123 const int n,
124 const DataType* RESTRICT x,
125 const int incx,
126 ComputeType* RESTRICT result);
127
128 // cublasTscal kernel
129 template <typename DataType>
130 __global__ void cublasTscal_kernel(
131 const int n,
132 const DataType alpha,
133 DataType* RESTRICT x,
134 const int incx);
135}
136
137#endif // _CU_BASIC_ALGEBRA_CUBLAS_IMPL_KERNELS_H_
#define RESTRICT
Templated kernel code for implenentations of several BLAS-type functions in CUDA.
__global__ void cublasTscal_kernel(const int n, const DataType alpha, DataType *RESTRICT x, const int incx)
Performs .
__global__ void cublasTaxpy_kernel(const int n, const DataType alpha, const DataType *RESTRICT x, const int incx, DataType *RESTRICT y, const int incy)
Performs .
__global__ void cublasTnrm2_kernel(const int n, const DataType *RESTRICT x, const int incx, ComputeType *RESTRICT result)
Computes .
__global__ void cublasTcopy_kernel(const int n, const DataType *RESTRICT x, const int incx, DataType *RESTRICT y, const int incy)
Performs .
__global__ void cublasTdot_kernel(const int n, const DataType *RESTRICT x, const int incx, const DataType *RESTRICT y, const int incy, ComputeType *RESTRICT result)
Computes .
__global__ void cublasTgemv_kernel(const bool trans, const int m, const int n, const DataType alpha, const DataType *RESTRICT A, const int lda, const DataType *RESTRICT x, const int incx, const DataType beta, DataType *RESTRICT y, const int incy)
Performs the operation .