imate
C++/CUDA Reference
Loading...
Searching...
No Matches
cuda_api.cu
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <sameli@berkeley.edu>
3 * SPDX-License-Identifier: BSD-3-Clause
4 * SPDX-FileType: SOURCE
5 *
6 * This program is free software: you can redistribute it and/or modify it
7 * under the terms of the license found in the LICENSE.txt file in the root
8 * directory of this source tree.
9 */
10
11
12// =======
13// Headers
14// =======
15
16#include "./cuda_api.h"
17#include "../_cu_definitions/cu_types.h" // __nv_fp8_e5m2, __nv_fp8_e4m3,
18 // __half, __nv_bfloat16
19#include <cassert> // assert
20#include <iostream> // std::cerr
21#include <cstdlib> // abort
22#include <limits> // std::numeric_limits
23#include "../_definitions/types.h" // LongIndexType
24
25
26// =======
27// alloc 1
28// =======
29
37
38template <typename ArrayType>
39ArrayType* CudaAPI<ArrayType>::alloc(const size_t array_size)
40{
41 // Check if overflowing might make array_size negative if array_size is
42 // a signed type. For unsigned type, we have no clue at this point.
43 assert(array_size > 0);
44
45 // Check if computing num_bytes will not overflow size_t (unsigned int)
46 size_t max_index = std::numeric_limits<size_t>::max();
47 if (max_index / sizeof(ArrayType) < array_size)
48 {
49 std::cerr << "The size of array in bytes exceeds the maximum " \
50 << "integer limit, which is: " << max_index << ". The " \
51 << "array size is: " << array_size << ", and the size of " \
52 << "data type is: " << sizeof(ArrayType) << "-bytes." \
53 << std::endl;
54 abort();
55 }
56
57 ArrayType* device_array;
58 size_t num_bytes = array_size * sizeof(ArrayType);
59 cudaError_t error = cudaMalloc(&device_array, num_bytes);
60 assert(error == cudaSuccess);
61
62 return device_array;
63}
64
65
66// =======
67// alloc 2
68// =======
69
77
78template <typename ArrayType>
80 ArrayType*& device_array,
81 const size_t array_size)
82{
83 // Check if overflowing might make array_size negative if array_size is
84 // a signed type. For unsigned type, we have no clue at this point.
85 assert(array_size > 0);
86
87 // Check if computing num_bytes will not overflow size_t (unsigned int)
88 size_t max_index = std::numeric_limits<size_t>::max();
89 if (max_index / sizeof(ArrayType) < array_size)
90 {
91 std::cerr << "The size of array in bytes exceeds the maximum " \
92 << "integer limit, which is: " << max_index << ". The " \
93 << "array size is: " << array_size << ", and the size of " \
94 << "data type is: " << sizeof(ArrayType) << "-bytes." \
95 << std::endl;
96 abort();
97 }
98
99 size_t num_bytes = array_size * sizeof(ArrayType);
100 cudaError_t error = cudaMalloc(&device_array, num_bytes);
101 assert(error == cudaSuccess);
102}
103
104
105// ===========
106// alloc bytes
107// ===========
108
116
117template <typename ArrayType>
119 void*& device_array,
120 const size_t num_bytes)
121{
122 // Check if overflowing might make num_bytes negative if size_t is
123 // a signed type. For unsigned type, we have no clue at this point.
124 assert(num_bytes > 0);
125
126 cudaError_t error = cudaMalloc(&device_array, num_bytes);
127 assert(error == cudaSuccess);
128}
129
130
131// ==============
132// copy to device
133// ==============
134
143
144template <typename ArrayType>
146 const ArrayType* host_array,
147 const size_t array_size,
148 ArrayType* device_array)
149{
150 size_t num_bytes = array_size * sizeof(ArrayType);
151 cudaError_t error = cudaMemcpy(device_array, host_array, num_bytes,
152 cudaMemcpyHostToDevice);
153 assert(error == cudaSuccess);
154}
155
156
157// ===
158// del
159// ===
160
167
168template <typename ArrayType>
169void CudaAPI<ArrayType>::del(void* device_array)
170{
171 if (device_array != NULL)
172 {
173 cudaError_t error = cudaFree(device_array);
174 assert(error == cudaSuccess);
175 device_array = NULL;
176 }
177}
178
179
180// ==========
181// set device
182// ==========
183
189
190template<typename ArrayType>
192{
193 cudaError_t error = cudaSetDevice(device_id);
194 assert(error == cudaSuccess);
195}
196
197
198// ==========
199// get device
200// ==========
201
207
208template<typename ArrayType>
210{
211 int device_id = -1;
212 cudaError_t error = cudaGetDevice(&device_id);
213 assert(error == cudaSuccess);
214
215 return device_id;
216}
217
218
219// ===============================
220// Explicit template instantiation
221// ===============================
222
223template class CudaAPI<LongIndexType>;
224
225#if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
226 template class CudaAPI<__nv_fp8_e5m2>;
227#endif
228
229#if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
230 template class CudaAPI<__nv_fp8_e4m3>;
231#endif
232
233#if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
234 template class CudaAPI<__half>;
235#endif
236
237#if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
238 template class CudaAPI<__nv_bfloat16>;
239#endif
240
241#if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)
242 template class CudaAPI<float>;
243#endif
244
245#if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)
246 template class CudaAPI<double>;
247#endif
An interface to CUDA linrary to facilitate working with CUDA, such as memory allocation,...
Definition cuda_api.h:36
static void set_device(int device_id)
Sets the current device in multi-gpu applications.
Definition cuda_api.cu:191
static ArrayType * alloc(const size_t array_size)
Allocates memory on gpu device. This function creates a pointer and returns it.
Definition cuda_api.cu:39
static void del(void *device_array)
Deletes memory on gpu device if its pointer is not NULL, then sets the pointer to NULL.
Definition cuda_api.cu:169
static void alloc_bytes(void *&device_array, const size_t num_bytes)
Allocates memory on gpu device. This function uses an existing given pointer.
Definition cuda_api.cu:118
static int get_device()
Gets the current device in multi-gpu applications.
Definition cuda_api.cu:209
static void copy_to_device(const ArrayType *host_array, const size_t array_size, ArrayType *device_array)
Copies memory on host to device memory.
Definition cuda_api.cu:145
cudaError_t cudaGetDevice(int *device)
Definition of CUDA's cudaGetDevice function using dynamically loaded cudart library.
cudaError_t cudaSetDevice(int device)
Definition of CUDA's cudaSetDevice function using dynamically loaded cudart library.
cudaError_t cudaFree(void *devPtr)
Definition of CUDA's cudaFree function using dynamically loaded cudart library.
cudaError_t cudaMemcpy(void *dst, const void *src, size_t count, cudaMemcpyKind kind)
Definition of CUDA's cudaMemcpy function using dynamically loaded cudart library.
cudaError_t cudaMalloc(void **devPtr, size_t size)
Definition of CUDA's cudaMalloc function using dynamically loaded cudart library.