imate
C++/CUDA Reference
Loading...
Searching...
No Matches
CudaAPI< ArrayType > Class Template Reference

An interface to CUDA linrary to facilitate working with CUDA, such as memory allocation, copy data to and from device, etc. This class contains all public static functions and serves as a namespace. More...

#include <cuda_api.h>

Static Public Member Functions

static ArrayType * alloc (const size_t array_size)
 Allocates memory on gpu device. This function creates a pointer and returns it.
 
static void alloc (ArrayType *&device_array, const size_t array_size)
 Allocates memory on gpu device. This function uses an existing given pointer.
 
static void alloc_bytes (void *&device_array, const size_t num_bytes)
 Allocates memory on gpu device. This function uses an existing given pointer.
 
static void copy_to_device (const ArrayType *host_array, const size_t array_size, ArrayType *device_array)
 Copies memory on host to device memory.
 
static void del (void *device_array)
 Deletes memory on gpu device if its pointer is not NULL, then sets the pointer to NULL.
 
static void set_device (int device_id)
 Sets the current device in multi-gpu applications.
 
static int get_device ()
 Gets the current device in multi-gpu applications.
 

Detailed Description

template<typename ArrayType>
class CudaAPI< ArrayType >

An interface to CUDA linrary to facilitate working with CUDA, such as memory allocation, copy data to and from device, etc. This class contains all public static functions and serves as a namespace.

Definition at line 35 of file cuda_api.h.

Member Function Documentation

◆ alloc() [1/2]

template<typename ArrayType >
void CudaAPI< ArrayType >::alloc ( ArrayType *&  device_array,
const size_t  array_size 
)
static

Allocates memory on gpu device. This function uses an existing given pointer.

Parameters
[in,out]device_arrayA pointer to the device memory to be allocated
[in]array_sizeSize of the array to be allocated.

Definition at line 79 of file cuda_api.cu.

82{
83 // Check if overflowing might make array_size negative if array_size is
84 // a signed type. For unsigned type, we have no clue at this point.
85 assert(array_size > 0);
86
87 // Check if computing num_bytes will not overflow size_t (unsigned int)
88 size_t max_index = std::numeric_limits<size_t>::max();
89 if (max_index / sizeof(ArrayType) < array_size)
90 {
91 std::cerr << "The size of array in bytes exceeds the maximum " \
92 << "integer limit, which is: " << max_index << ". The " \
93 << "array size is: " << array_size << ", and the size of " \
94 << "data type is: " << sizeof(ArrayType) << "-bytes." \
95 << std::endl;
96 abort();
97 }
98
99 size_t num_bytes = array_size * sizeof(ArrayType);
100 cudaError_t error = cudaMalloc(&device_array, num_bytes);
101 assert(error == cudaSuccess);
102}
cudaError_t cudaMalloc(void **devPtr, size_t size)
Definition of CUDA's cudaMalloc function using dynamically loaded cudart library.

References cudaMalloc().

Here is the call graph for this function:

◆ alloc() [2/2]

template<typename ArrayType >
ArrayType * CudaAPI< ArrayType >::alloc ( const size_t  array_size)
static

Allocates memory on gpu device. This function creates a pointer and returns it.

Parameters
[in]array_sizeSize of the array to be allocated.
Returns
A pointer to the allocated 1D array on device.

Definition at line 39 of file cuda_api.cu.

40{
41 // Check if overflowing might make array_size negative if array_size is
42 // a signed type. For unsigned type, we have no clue at this point.
43 assert(array_size > 0);
44
45 // Check if computing num_bytes will not overflow size_t (unsigned int)
46 size_t max_index = std::numeric_limits<size_t>::max();
47 if (max_index / sizeof(ArrayType) < array_size)
48 {
49 std::cerr << "The size of array in bytes exceeds the maximum " \
50 << "integer limit, which is: " << max_index << ". The " \
51 << "array size is: " << array_size << ", and the size of " \
52 << "data type is: " << sizeof(ArrayType) << "-bytes." \
53 << std::endl;
54 abort();
55 }
56
57 ArrayType* device_array;
58 size_t num_bytes = array_size * sizeof(ArrayType);
59 cudaError_t error = cudaMalloc(&device_array, num_bytes);
60 assert(error == cudaSuccess);
61
62 return device_array;
63}

References cudaMalloc().

Referenced by cuCSCMatrix< DataType >::copy_host_to_device(), cuCSRMatrix< DataType >::copy_host_to_device(), cuDenseMatrix< DataType >::copy_host_to_device(), cu_golub_kahn_bidiagonalization(), and cu_lanczos_tridiagonalization().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ alloc_bytes()

template<typename ArrayType >
void CudaAPI< ArrayType >::alloc_bytes ( void *&  device_array,
const size_t  num_bytes 
)
static

Allocates memory on gpu device. This function uses an existing given pointer.

Parameters
[in,out]device_arrayA pointer to the device memory to be allocated
[in]num_bytesNumber of bytes of the array to be allocated.

Definition at line 118 of file cuda_api.cu.

121{
122 // Check if overflowing might make num_bytes negative if size_t is
123 // a signed type. For unsigned type, we have no clue at this point.
124 assert(num_bytes > 0);
125
126 cudaError_t error = cudaMalloc(&device_array, num_bytes);
127 assert(error == cudaSuccess);
128}

References cudaMalloc().

Referenced by cuCSCMatrix< DataType >::allocate_buffer(), and cuCSRMatrix< DataType >::allocate_buffer().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ copy_to_device()

template<typename ArrayType >
void CudaAPI< ArrayType >::copy_to_device ( const ArrayType *  host_array,
const size_t  array_size,
ArrayType *  device_array 
)
static

Copies memory on host to device memory.

Parameters
[in]host_arrayPointer of 1D array memory on host
[in]array_sizeThe size of array on host.
[out]device_arrayPointer to the destination memory on device.

Definition at line 145 of file cuda_api.cu.

149{
150 size_t num_bytes = array_size * sizeof(ArrayType);
151 cudaError_t error = cudaMemcpy(device_array, host_array, num_bytes,
152 cudaMemcpyHostToDevice);
153 assert(error == cudaSuccess);
154}
cudaError_t cudaMemcpy(void *dst, const void *src, size_t count, cudaMemcpyKind kind)
Definition of CUDA's cudaMemcpy function using dynamically loaded cudart library.

References cudaMemcpy().

Referenced by cuCSCMatrix< DataType >::copy_host_to_device(), cuCSRMatrix< DataType >::copy_host_to_device(), cuDenseMatrix< DataType >::copy_host_to_device(), cu_golub_kahn_bidiagonalization(), cu_lanczos_tridiagonalization(), and cuOrthogonalization< DataType >::orthogonalize_vectors().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ del()

template<typename ArrayType >
void CudaAPI< ArrayType >::del ( void *  device_array)
static

Deletes memory on gpu device if its pointer is not NULL, then sets the pointer to NULL.

Parameters
[in,out]device_arrayA pointer to memory on device to be deleted. This pointer will be set to NULL.

Definition at line 169 of file cuda_api.cu.

170{
171 if (device_array != NULL)
172 {
173 cudaError_t error = cudaFree(device_array);
174 assert(error == cudaSuccess);
175 device_array = NULL;
176 }
177}
cudaError_t cudaFree(void *devPtr)
Definition of CUDA's cudaFree function using dynamically loaded cudart library.

References cudaFree().

Referenced by cuCSCMatrix< DataType >::allocate_buffer(), cuCSRMatrix< DataType >::allocate_buffer(), cu_golub_kahn_bidiagonalization(), cu_lanczos_tridiagonalization(), cuCSCMatrix< DataType >::~cuCSCMatrix(), cuCSRMatrix< DataType >::~cuCSRMatrix(), and cuDenseMatrix< DataType >::~cuDenseMatrix().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ get_device()

template<typename ArrayType >
int CudaAPI< ArrayType >::get_device ( )
static

Gets the current device in multi-gpu applications.

Returns
device_id The id of the current device. The id is a number from 0 to num_gpu_devices-1

Definition at line 209 of file cuda_api.cu.

210{
211 int device_id = -1;
212 cudaError_t error = cudaGetDevice(&device_id);
213 assert(error == cudaSuccess);
214
215 return device_id;
216}
cudaError_t cudaGetDevice(int *device)
Definition of CUDA's cudaGetDevice function using dynamically loaded cudart library.

References cudaGetDevice().

Referenced by cuAffineMatrixFunction< DataType >::_add_scaled_vector(), cuCSCMatrix< DataType >::dot(), cuCSRMatrix< DataType >::dot(), cuDenseMatrix< DataType >::dot(), cuCSCMatrix< DataType >::dot_plus(), cuCSRMatrix< DataType >::dot_plus(), cuDenseMatrix< DataType >::dot_plus(), cuLinearOperator< DataType >::get_cublas_handle(), cuCSCMatrix< DataType >::transpose_dot(), cuCSRMatrix< DataType >::transpose_dot(), cuDenseMatrix< DataType >::transpose_dot(), cuCSCMatrix< DataType >::transpose_dot_plus(), cuCSRMatrix< DataType >::transpose_dot_plus(), and cuDenseMatrix< DataType >::transpose_dot_plus().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ set_device()

template<typename ArrayType >
void CudaAPI< ArrayType >::set_device ( int  device_id)
static

Sets the current device in multi-gpu applications.

Parameters
[in]device_idThe id of the device to switch to. The id is a number from 0 to num_gpu_devices-1

Definition at line 191 of file cuda_api.cu.

192{
193 cudaError_t error = cudaSetDevice(device_id);
194 assert(error == cudaSuccess);
195}
cudaError_t cudaSetDevice(int device)
Definition of CUDA's cudaSetDevice function using dynamically loaded cudart library.

References cudaSetDevice().

Referenced by cuCSCMatrix< DataType >::copy_host_to_device(), cuCSRMatrix< DataType >::copy_host_to_device(), cuDenseMatrix< DataType >::copy_host_to_device(), cuTraceEstimator< DataType >::cu_trace_estimator(), cuLinearOperator< DataType >::initialize_cublas_handle(), cuLinearOperator< DataType >::initialize_cusparse_handle(), cuCSCMatrix< DataType >::~cuCSCMatrix(), cuCSRMatrix< DataType >::~cuCSRMatrix(), cuDenseMatrix< DataType >::~cuDenseMatrix(), and cuLinearOperator< DataType >::~cuLinearOperator().

Here is the call graph for this function:
Here is the caller graph for this function:

The documentation for this class was generated from the following files: