17#include "../_cu_definitions/cu_types.h"
23#include "../_definitions/types.h"
38template <
typename ArrayType>
43 assert(array_size > 0);
46 size_t max_index = std::numeric_limits<size_t>::max();
47 if (max_index /
sizeof(ArrayType) < array_size)
49 std::cerr <<
"The size of array in bytes exceeds the maximum " \
50 <<
"integer limit, which is: " << max_index <<
". The " \
51 <<
"array size is: " << array_size <<
", and the size of " \
52 <<
"data type is: " <<
sizeof(ArrayType) <<
"-bytes." \
57 ArrayType* device_array;
58 size_t num_bytes = array_size *
sizeof(ArrayType);
59 cudaError_t error =
cudaMalloc(&device_array, num_bytes);
60 assert(error == cudaSuccess);
78template <
typename ArrayType>
80 ArrayType*& device_array,
81 const size_t array_size)
85 assert(array_size > 0);
88 size_t max_index = std::numeric_limits<size_t>::max();
89 if (max_index /
sizeof(ArrayType) < array_size)
91 std::cerr <<
"The size of array in bytes exceeds the maximum " \
92 <<
"integer limit, which is: " << max_index <<
". The " \
93 <<
"array size is: " << array_size <<
", and the size of " \
94 <<
"data type is: " <<
sizeof(ArrayType) <<
"-bytes." \
99 size_t num_bytes = array_size *
sizeof(ArrayType);
100 cudaError_t error =
cudaMalloc(&device_array, num_bytes);
101 assert(error == cudaSuccess);
117template <
typename ArrayType>
120 const size_t num_bytes)
124 assert(num_bytes > 0);
126 cudaError_t error =
cudaMalloc(&device_array, num_bytes);
127 assert(error == cudaSuccess);
144template <
typename ArrayType>
146 const ArrayType* host_array,
147 const size_t array_size,
148 ArrayType* device_array)
150 size_t num_bytes = array_size *
sizeof(ArrayType);
151 cudaError_t error =
cudaMemcpy(device_array, host_array, num_bytes,
152 cudaMemcpyHostToDevice);
153 assert(error == cudaSuccess);
168template <
typename ArrayType>
171 if (device_array != NULL)
173 cudaError_t error =
cudaFree(device_array);
174 assert(error == cudaSuccess);
190template<
typename ArrayType>
194 assert(error == cudaSuccess);
208template<
typename ArrayType>
213 assert(error == cudaSuccess);
225#if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
229#if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
233#if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
237#if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
241#if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)
245#if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)
An interface to CUDA linrary to facilitate working with CUDA, such as memory allocation,...
static void set_device(int device_id)
Sets the current device in multi-gpu applications.
static ArrayType * alloc(const size_t array_size)
Allocates memory on gpu device. This function creates a pointer and returns it.
static void del(void *device_array)
Deletes memory on gpu device if its pointer is not NULL, then sets the pointer to NULL.
static void alloc_bytes(void *&device_array, const size_t num_bytes)
Allocates memory on gpu device. This function uses an existing given pointer.
static int get_device()
Gets the current device in multi-gpu applications.
static void copy_to_device(const ArrayType *host_array, const size_t array_size, ArrayType *device_array)
Copies memory on host to device memory.
cudaError_t cudaGetDevice(int *device)
Definition of CUDA's cudaGetDevice function using dynamically loaded cudart library.
cudaError_t cudaSetDevice(int device)
Definition of CUDA's cudaSetDevice function using dynamically loaded cudart library.
cudaError_t cudaFree(void *devPtr)
Definition of CUDA's cudaFree function using dynamically loaded cudart library.
cudaError_t cudaMemcpy(void *dst, const void *src, size_t count, cudaMemcpyKind kind)
Definition of CUDA's cudaMemcpy function using dynamically loaded cudart library.
cudaError_t cudaMalloc(void **devPtr, size_t size)
Definition of CUDA's cudaMalloc function using dynamically loaded cudart library.