imate
C++/CUDA Reference
Loading...
Searching...
No Matches
cu_vector_operations.cu
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <sameli@berkeley.edu>
3 * SPDX-License-Identifier: BSD-3-Clause
4 * SPDX-FileType: SOURCE
5 *
6 * This program is free software: you can redistribute it and/or modify it
7 * under the terms of the license found in the LICENSE.txt file in the root
8 * directory of this source tree.
9 */
10
11
12// =======
13// Headers
14// =======
15
17#include <cmath> // sqrt
18#include <cassert> // assert
19#include "../_cu_definitions/cu_types.h" // __nv_fp8_e5m2, __nv_fp8_e4m3,
20 // __half, __nv_bfloat16
21#include "../_cu_arithmetics/cu_arithmetics.h" // cu_arithmetics
22#include "./cublas_api.h" // cublas_api
23
24
25// ===========
26// copy vector
27// ===========
28
39
40template <typename DataType>
42 cublasHandle_t cublas_handle,
43 const DataType* RESTRICT input_vector,
44 const LongIndexType vector_size,
45 DataType* RESTRICT output_vector)
46{
47 int incx = 1;
48 int incy = 1;
49
50 cublasStatus_t status = cublas_api::cublasXcopy(
51 cublas_handle, vector_size, input_vector, incx, output_vector,
52 incy);
53
54 assert(status == CUBLAS_STATUS_SUCCESS);
55}
56
57// ==================
58// copy scaled vector
59// ==================
60
74
75template <typename DataType>
77 cublasHandle_t cublas_handle,
78 const DataType* RESTRICT input_vector,
79 const LongIndexType vector_size,
80 const DataType scale,
81 DataType* RESTRICT output_vector)
82{
83 cublasStatus_t status;
84 int incx = 1;
85 int incy = 1;
86
87 // Copy input to output vector
88 status = cublas_api::cublasXcopy(cublas_handle, vector_size, input_vector,
89 incx, output_vector, incy);
90
91 assert(status == CUBLAS_STATUS_SUCCESS);
92
93 // Scale output vector
94 status = cublas_api::cublasXscal(cublas_handle, vector_size, &scale,
95 output_vector, incy);
96
97 assert(status == CUBLAS_STATUS_SUCCESS);
98}
99
100
101// ======================
102// subtract scaled vector
103// ======================
104
126
127template <typename DataType>
129 cublasHandle_t cublas_handle,
130 const DataType* RESTRICT input_vector,
131 const LongIndexType vector_size,
132 const DataType scale,
133 DataType* RESTRICT output_vector)
134{
135 DataType zero = 0.0;
136 if (cu_arithmetics::is_equal(scale, zero))
137 {
138 return;
139 }
140
141 int incx = 1;
142 int incy = 1;
143
144 DataType neg_scale = -scale;
145 cublasStatus_t status = cublas_api::cublasXaxpy(
146 cublas_handle, vector_size, &neg_scale, input_vector, incx,
147 output_vector, incy);
148
149 assert(status == CUBLAS_STATUS_SUCCESS);
150}
151
152
153// =============
154// inner product
155// =============
156
167
168template <typename DataType>
170 cublasHandle_t cublas_handle,
171 const DataType* RESTRICT vector1,
172 const DataType* RESTRICT vector2,
173 const LongIndexType vector_size)
174{
175 DataType inner_prod;
176 int incx = 1;
177 int incy = 1;
178
179 cublasStatus_t status = cublas_api::cublasXdot(
180 cublas_handle, vector_size, vector1, incx, vector2, incy,
181 &inner_prod);
182
183 assert(status == CUBLAS_STATUS_SUCCESS);
184
185 return inner_prod;
186}
187
188
189// ==============
190// euclidean norm
191// ==============
192
202
203template <typename DataType>
205 cublasHandle_t cublas_handle,
206 const DataType* RESTRICT vector,
207 const LongIndexType vector_size)
208{
209 DataType norm;
210 int incx = 1;
211
212 cublasStatus_t status = cublas_api::cublasXnrm2(
213 cublas_handle, vector_size, vector, incx, &norm);
214
215 assert(status == CUBLAS_STATUS_SUCCESS);
216
217 return norm;
218}
219
220
221// =========================
222// normalize vector in place
223// =========================
224
235
236template <typename DataType>
238 cublasHandle_t cublas_handle,
239 DataType* RESTRICT vector,
240 const LongIndexType vector_size)
241{
242 // Norm of vector
244 cublas_handle, vector, vector_size);
245
246 // Normalize in place
247 DataType scale = cu_arithmetics::div(
249 norm);
250 int incx = 1;
251 cublasStatus_t status = cublas_api::cublasXscal(
252 cublas_handle, vector_size, &scale, vector, incx);
253
254 assert(status == CUBLAS_STATUS_SUCCESS);
255
256 return norm;
257}
258
259
260// =========================
261// normalize vector and copy
262// =========================
263
276
277template <typename DataType>
279 cublasHandle_t cublas_handle,
280 const DataType* RESTRICT vector,
281 const LongIndexType vector_size,
282 DataType* RESTRICT output_vector)
283{
284 // Norm of vector
286 cublas_handle, vector, vector_size);
287
288 // Normalize to output
289 DataType scale = cu_arithmetics::div(
291 norm);
293 vector_size, scale,
294 output_vector);
295
296 return norm;
297}
298
299
300// ===============================
301// Explicit template instantiation
302// ===============================
303
304#if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
306#endif
307
308#if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
310#endif
311
312#if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
313 template class cuVectorOperations<__half>;
314#endif
315
316#if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
318#endif
319
320#if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)
321 template class cuVectorOperations<float>;
322#endif
323
324#if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)
325 template class cuVectorOperations<double>;
326#endif
#define RESTRICT
A static class for vector operations, similar to level-1 operations of the BLAS library....
static DataType normalize_vector_in_place(cublasHandle_t cublas_handle, DataType *RESTRICT vector, const LongIndexType vector_size)
Normalizes a vector based on Euclidean 2-norm. The result is written in-place.
static void copy_scaled_vector(cublasHandle_t cublas_handle, const DataType *RESTRICT input_vector, const LongIndexType vector_size, const DataType scale, DataType *RESTRICT output_vector)
Scales a vector and stores to a new vector.
static void subtract_scaled_vector(cublasHandle_t cublas_handle, const DataType *RESTRICT input_vector, const LongIndexType vector_size, const DataType scale, DataType *RESTRICT output_vector)
Subtracts the scaled input vector from the output vector.
static DataType normalize_vector_and_copy(cublasHandle_t cublas_handle, const DataType *RESTRICT vector, const LongIndexType vector_size, DataType *RESTRICT output_vector)
Normalizes a vector based on Euclidean 2-norm. The result is written into another vector.
static void copy_vector(cublasHandle_t cublas_handle, const DataType *RESTRICT input_vector, const LongIndexType vector_size, DataType *RESTRICT output_vector)
Copies a vector to a new vector. Result is written in-place.
static DataType inner_product(cublasHandle_t cublas_handle, const DataType *RESTRICT vector1, const DataType *RESTRICT vector2, const LongIndexType vector_size)
Computes Euclidean inner product of two vectors.
static DataType euclidean_norm(cublasHandle_t cublas_handle, const DataType *RESTRICT vector, const LongIndexType vector_size)
Computes the Euclidean 2-norm of a 1D array.
__host__ __device__ DataType abs(const DataType x)
Absolute value of a floating point number.
__host__ __device__ DataType div(const DataType x, const DataType y)
Divide two floating point numbers in round-to-nearest-even mode.
bool is_equal(DataType x, DataType y)
Check if two floating point numbers are equal within a tolerance.
cublasStatus_t cublasXaxpy(cublasHandle_t handle, int n, const DataType *RESTRICT alpha, const DataType *RESTRICT x, int incx, DataType *RESTRICT y, int incy)
cublasStatus_t cublasXnrm2(cublasHandle_t handle, int n, const DataType *RESTRICT x, int incx, DataType *RESTRICT result)
cublasStatus_t cublasXscal(cublasHandle_t handle, int n, const DataType *RESTRICT alpha, DataType *RESTRICT x, int incx)
cublasStatus_t cublasXcopy(cublasHandle_t handle, int n, const DataType *RESTRICT x, int incx, DataType *RESTRICT y, int incy)
cublasStatus_t cublasXdot(cublasHandle_t handle, int n, const DataType *RESTRICT x, int incx, const DataType *RESTRICT y, int incy, DataType *RESTRICT result)
int LongIndexType
Definition types.h:60