imate
C++/CUDA Reference
Loading...
Searching...
No Matches
cublas_impl.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <sameli@berkeley.edu>
3 * SPDX-License-Identifier: BSD-3-Clause
4 * SPDX-FileType: SOURCE
5 *
6 * This program is free software: you can redistribute it and/or modify it
7 * under the terms of the license found in the LICENSE.txt file in the root
8 * directory of this source tree.
9 */
10
11#ifndef _CU_BASIC_ALGEBRA_CUBLAS_IMPL_H_
12#define _CU_BASIC_ALGEBRA_CUBLAS_IMPL_H_
13
14// =======
15// Headers
16// =======
17
18// Avoid CUBLAS numeration value not handled in switch [-Wswitch-enum] warning
19#ifdef _MSC_VER
20 #pragma warning(push, 0) // Suppress all warnings from the followings
21 #include <cublas_v2.h>
22 #pragma warning(pop) // Restore previous warning level
23#elif defined(__INTEL_LLVM_COMPILER) || defined(__INTEL_COMPILER)
24 #pragma warning(push, 0)
25 #include <cublas_v2.h>
26 #pragma warning(pop)
27#elif defined(__GNUC__) || defined(__clang__)
28 #pragma GCC diagnostic push
29 #pragma GCC diagnostic ignored "-Wswitch-enum"
30 #include <cublas_v2.h>
31 #pragma GCC diagnostic pop
32#else
33 #include <cublas_v2.h> // cublasOperation_t, CUBLAS_OP_N, CUBLAS_OP_T,
34 // cudaError_t
35#endif
36
37// Restrict qualifier
38// Note: generally, the restrict qualifier is only needed to be be added to the
39// function signature in the implementation (*.cu files) and not the
40// declarations (in *h files). However, CUDA seems to have a bug that the
41// restrict qualifier should be added to BOTH declaration and implementation,
42// otherwise, it corrupts the array pointers.
43#if defined(_MSC_VER)
44 #define RESTRICT __restrict
45#elif defined(__INTEL_COMPILER)
46 #define RESTRICT __restrict
47#elif defined(__CUDA__) || defined(__GNUC__) || defined(__clang__)
48 #define RESTRICT __restrict__
49#else
50 #define RESTRICT
51#endif
52
53
54// ===========
55// cublas impl
56// ===========
57
94
95namespace cublas_impl
96{
97 // cublasTgemv
98 template <typename DataType, typename ComputeType>
99 cudaError_t cublasTgemv(
100 cublasOperation_t trans,
101 int m,
102 int n,
103 const DataType* RESTRICT alpha,
104 const DataType* RESTRICT A,
105 int lda,
106 const DataType* RESTRICT x,
107 int incx,
108 const DataType* RESTRICT beta,
109 DataType* RESTRICT y,
110 int incy);
111
112 // cublasTcopy
113 template <typename DataType>
114 cudaError_t cublasTcopy(
115 int n,
116 const DataType* RESTRICT x,
117 int incx,
118 DataType* RESTRICT y,
119 int incy);
120
121 // cublasTaxpy
122 template <typename DataType>
123 cudaError_t cublasTaxpy(
124 int n,
125 const DataType* RESTRICT alpha,
126 const DataType* RESTRICT x,
127 int incx,
128 DataType* RESTRICT y,
129 int incy);
130
131 // cublasTdot
132 template <typename DataType, typename ComputeType>
133 cudaError_t cublasTdot(
134 int n,
135 const DataType* RESTRICT x,
136 int incx,
137 const DataType* RESTRICT y,
138 int incy,
139 DataType* RESTRICT result);
140
141 // cublasTnrm2
142 template <typename DataType, typename ComputeType>
143 cudaError_t cublasTnrm2(
144 int n,
145 const DataType* RESTRICT x,
146 int incx,
147 DataType* RESTRICT result);
148
149 // cublasTscal
150 template <typename DataType>
151 cudaError_t cublasTscal(
152 int n,
153 const DataType* RESTRICT alpha,
154 DataType* RESTRICT x,
155 int incx);
156}
157
158#endif // _CU_BASIC_ALGEBRA_CUBLAS_IMPL_H_
#define RESTRICT
Templated implenentations of several BLAS-type functions in CUDA.
cudaError_t cublasTaxpy(int n, const DataType *RESTRICT alpha, const DataType *RESTRICT x, int incx, DataType *RESTRICT y, int incy)
Performs .
cudaError_t cublasTgemv(cublasOperation_t trans, int m, int n, const DataType *RESTRICT alpha, const DataType *RESTRICT A, int lda, const DataType *RESTRICT x, int incx, const DataType *RESTRICT beta, DataType *RESTRICT y, int incy)
Performs .
cudaError_t cublasTdot(int n, const DataType *RESTRICT x, int incx, const DataType *RESTRICT y, int incy, DataType *RESTRICT result)
Computes .
cudaError_t cublasTcopy(int n, const DataType *RESTRICT x, int incx, DataType *RESTRICT y, int incy)
Performs .
cudaError_t cublasTnrm2(int n, const DataType *RESTRICT x, int incx, DataType *RESTRICT result)
Computes .
cudaError_t cublasTscal(int n, const DataType *RESTRICT alpha, DataType *RESTRICT x, int incx)
Performs .