imate
C++/CUDA Reference
cuda_interface.cu
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <sameli@berkeley.edu>
3  * SPDX-License-Identifier: BSD-3-Clause
4  * SPDX-FileType: SOURCE
5  *
6  * This program is free software: you can redistribute it and/or modify it
7  * under the terms of the license found in the LICENSE.txt file in the root
8  * directory of this source tree.
9  */
10 
11 
12 // =======
13 // Headers
14 // =======
15 
16 #include "./cuda_interface.h"
17 #include <cassert> // assert
18 #include <iostream> // std::cerr
19 #include <cstdlib> // abort
20 #include <limits> // std::numeric_limits
21 
22 
23 // =======
24 // alloc 1
25 // =======
26 
34 
35 template <typename ArrayType>
36 ArrayType* CudaInterface<ArrayType>::alloc(const LongIndexType array_size)
37 {
38  // Check if overflowing might make array_size negative if LongIndexType is
39  // a signed type. For unsigned type, we have no clue at this point.
40  assert(array_size > 0);
41 
42  // Check if computing num_bytes will not overflow size_t (unsigned int)
43  size_t max_index = std::numeric_limits<size_t>::max();
44  if (max_index / sizeof(ArrayType) < array_size)
45  {
46  std::cerr << "The size of array in bytes exceeds the maximum " \
47  << "integer limit, which is: " << max_index << ". The " \
48  << "array size is: " << array_size << ", and the size of " \
49  << "data type is: " << sizeof(ArrayType) << "-bytes." \
50  << std::endl;
51  abort();
52  }
53 
54  ArrayType* device_array;
55  size_t num_bytes = static_cast<size_t>(array_size) * sizeof(ArrayType);
56  cudaError_t error = cudaMalloc(&device_array, num_bytes);
57  assert(error == cudaSuccess);
58 
59  return device_array;
60 }
61 
62 
63 // =======
64 // alloc 2
65 // =======
66 
74 
75 template <typename ArrayType>
77  ArrayType*& device_array,
78  const LongIndexType array_size)
79 {
80  // Check if overflowing might make array_size negative if LongIndexType is
81  // a signed type. For unsigned type, we have no clue at this point.
82  assert(array_size > 0);
83 
84  // Check if computing num_bytes will not overflow size_t (unsigned int)
85  size_t max_index = std::numeric_limits<size_t>::max();
86  if (max_index / sizeof(ArrayType) < array_size)
87  {
88  std::cerr << "The size of array in bytes exceeds the maximum " \
89  << "integer limit, which is: " << max_index << ". The " \
90  << "array size is: " << array_size << ", and the size of " \
91  << "data type is: " << sizeof(ArrayType) << "-bytes." \
92  << std::endl;
93  abort();
94  }
95 
96  size_t num_bytes = static_cast<size_t>(array_size) * sizeof(ArrayType);
97  cudaError_t error = cudaMalloc(&device_array, num_bytes);
98  assert(error == cudaSuccess);
99 }
100 
101 
102 // ===========
103 // alloc bytes
104 // ===========
105 
113 
114 template <typename ArrayType>
116  void*& device_array,
117  const size_t num_bytes)
118 {
119  // Check if overflowing might make num_bytes negative if size_t is
120  // a signed type. For unsigned type, we have no clue at this point.
121  assert(num_bytes > 0);
122 
123  cudaError_t error = cudaMalloc(&device_array, num_bytes);
124  assert(error == cudaSuccess);
125 }
126 
127 
128 // ==============
129 // copy to device
130 // ==============
131 
140 
141 template <typename ArrayType>
143  const ArrayType* host_array,
144  const LongIndexType array_size,
145  ArrayType* device_array)
146 {
147  size_t num_bytes = static_cast<size_t>(array_size) * sizeof(ArrayType);
148  cudaError_t error = cudaMemcpy(device_array, host_array, num_bytes,
149  cudaMemcpyHostToDevice);
150  assert(error == cudaSuccess);
151 }
152 
153 
154 // ===
155 // del
156 // ===
157 
164 
165 template <typename ArrayType>
166 void CudaInterface<ArrayType>::del(void* device_array)
167 {
168  if (device_array != NULL)
169  {
170  cudaError_t error = cudaFree(device_array);
171  assert(error == cudaSuccess);
172  device_array = NULL;
173  }
174 }
175 
176 
177 // ==========
178 // set device
179 // ==========
180 
186 
187 template<typename ArrayType>
189 {
190  cudaError_t error = cudaSetDevice(device_id);
191  assert(error == cudaSuccess);
192 }
193 
194 
195 // ==========
196 // get device
197 // ==========
198 
204 
205 template<typename ArrayType>
207 {
208  int device_id = -1;
209  cudaError_t error = cudaGetDevice(&device_id);
210  assert(error == cudaSuccess);
211 
212  return device_id;
213 }
214 
215 
216 // ===============================
217 // Explicit template instantiation
218 // ===============================
219 
220 template class CudaInterface<LongIndexType>;
221 template class CudaInterface<float>;
222 template class CudaInterface<double>;
An interface to CUDA linrary to facilitate working with CUDA, such as memory allocation,...
static int get_device()
Gets the current device in multi-gpu applications.
static void del(void *device_array)
Deletes memory on gpu device if its pointer is not NULL, then sets the pointer to NULL.
static void alloc_bytes(void *&device_array, const size_t num_bytes)
Allocates memory on gpu device. This function uses an existing given pointer.
static ArrayType * alloc(const LongIndexType array_size)
Allocates memory on gpu device. This function creates a pointer and returns it.
static void copy_to_device(const ArrayType *host_array, const LongIndexType array_size, ArrayType *device_array)
Copies memory on host to device memory.
static void set_device(int device_id)
Sets the current device in multi-gpu applications.
cudaError_t cudaGetDevice(int *device)
Definition of CUDA's cudaGetDevice function using dynamically loaded cudart library.
cudaError_t cudaSetDevice(int device)
Definition of CUDA's cudaSetDevice function using dynamically loaded cudart library.
cudaError_t cudaFree(void *devPtr)
Definition of CUDA's cudaFree function using dynamically loaded cudart library.
cudaError_t cudaMemcpy(void *dst, const void *src, size_t count, cudaMemcpyKind kind)
Definition of CUDA's cudaMemcpy function using dynamically loaded cudart library.
cudaError_t cudaMalloc(void **devPtr, size_t size)
Definition of CUDA's cudaMalloc function using dynamically loaded cudart library.
int LongIndexType
Definition: types.h:60