imate
C++/CUDA Reference
cu_linear_operator.cu
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <sameli@berkeley.edu>
3  * SPDX-License-Identifier: BSD-3-Clause
4  * SPDX-FileType: SOURCE
5  *
6  * This program is free software: you can redistribute it and/or modify it
7  * under the terms of the license found in the LICENSE.txt file in the root
8  * directory of this source tree.
9  */
10 
11 
12 // =======
13 // Headers
14 // =======
15 
16 #include "./cu_linear_operator.h"
17 #include <omp.h> // omp_set_num_threads
18 #include <cstddef> // NULL
19 #include <cassert> // assert
20 #include <cstdlib> // abort
21 #include <iostream>
22 #include "../_cuda_utilities/cuda_interface.h" // CudaInterface
23 
24 
25 // =============
26 // constructor 1
27 // =============
28 
29 template <typename DataType>
31 
32  // Initializer list
33  num_gpu_devices(0),
34  copied_host_to_device(false),
35  cublas_handle(NULL),
36  cusparse_handle(NULL)
37 {
38  // Check any gpu device exists
39  this->num_gpu_devices = this->query_gpu_devices();
40 
41  // Regardless of using dense (cublas) or sparse (cusparse) matrices, the
42  // cublas handle should be initialized, since it is needed for the methods
43  // in cuVectorOperations
45 }
46 
47 
48 // =============
49 // constructor 2
50 // =============
51 
58 
59 template <typename DataType>
61  const IndexType num_gpu_devices_):
62 
63  // Initializer list
64  num_gpu_devices(0),
65  copied_host_to_device(false),
66  cublas_handle(NULL),
67  cusparse_handle(NULL)
68 {
69  // Check any gpu device exists
70  int device_count = this->query_gpu_devices();
71 
72  // Set number of gpu devices
73  if (num_gpu_devices_ == 0)
74  {
75  this->num_gpu_devices = device_count;
76  }
77  else if (num_gpu_devices_ > device_count)
78  {
79  std::cerr << "ERROR: Number of requested gpu devices exceeds the " \
80  << "number of available gpu devices. Nummber of detected " \
81  << "devices are " << device_count << " while the " \
82  << "requested number of devices are " << num_gpu_devices_ \
83  << "." << std::endl;
84  abort();
85  }
86  else
87  {
88  this->num_gpu_devices = num_gpu_devices_;
89  }
90 
91  // Regardless of using dense (cublas) or sparse (cusparse) matrices, the
92  // cublas handle should be initialized, since it is needed for the methods
93  // in cuVectorOperations
95 }
96 
97 
98 // ==========
99 // destructor
100 // ==========
101 
102 template <typename DataType>
104 {
105  // cublas handle
106  if (this->cublas_handle != NULL)
107  {
108  // Set the number of threads
109  omp_set_num_threads(this->num_gpu_devices);
110 
111  #pragma omp parallel
112  {
113  // Switch to a device with the same device id as the cpu thread id
114  unsigned int thread_id = omp_get_thread_num();
116 
117  cublasStatus_t status = cublasDestroy(
118  this->cublas_handle[thread_id]);
119  assert(status == CUBLAS_STATUS_SUCCESS);
120  }
121 
122  // Deallocate arrays of pointers on cpu
123  delete[] this->cublas_handle;
124  this->cublas_handle = NULL;
125  }
126 
127  // cusparse handle
128  if (this->cusparse_handle != NULL)
129  {
130  // Set the number of threads
131  omp_set_num_threads(this->num_gpu_devices);
132 
133  #pragma omp parallel
134  {
135  // Switch to a device with the same device id as the cpu thread id
136  unsigned int thread_id = omp_get_thread_num();
138 
139  cusparseStatus_t status = cusparseDestroy(
140  this->cusparse_handle[thread_id]);
141  assert(status == CUSPARSE_STATUS_SUCCESS);
142  }
143 
144  // Deallocate arrays of pointers on cpu
145  delete[] this->cusparse_handle;
146  this->cusparse_handle = NULL;
147  }
148 }
149 
150 
151 // =================
152 // get cublas handle
153 // =================
154 
166 
167 template <typename DataType>
169 {
170  // Get device id
171  int device_id = CudaInterface<DataType>::get_device();
172 
173  return this->cublas_handle[device_id];
174 }
175 
176 
177 // ========================
178 // initialize cublas handle
179 // ========================
180 
183 
184 template <typename DataType>
186 {
187  if (this->cublas_handle == NULL)
188  {
189  // Allocate pointers for each gpu device
190  this->cublas_handle = new cublasHandle_t[this->num_gpu_devices];
191 
192  // Set the number of threads
193  omp_set_num_threads(this->num_gpu_devices);
194 
195  #pragma omp parallel
196  {
197  // Switch to a device with the same device id as the cpu thread id
198  unsigned int thread_id = omp_get_thread_num();
200 
201  cublasStatus_t status = cublasCreate(
202  &this->cublas_handle[thread_id]);
203  assert(status == CUBLAS_STATUS_SUCCESS);
204  }
205  }
206 }
207 
208 
209 // ==========================
210 // initialize cusparse handle
211 // ==========================
212 
215 
216 template <typename DataType>
218 {
219  if (this->cusparse_handle == NULL)
220  {
221  // Allocate pointers for each gpu device
222  this->cusparse_handle = new cusparseHandle_t[this->num_gpu_devices];
223 
224  // Set the number of threads
225  omp_set_num_threads(this->num_gpu_devices);
226 
227  #pragma omp parallel
228  {
229  // Switch to a device with the same device id as the cpu thread id
230  unsigned int thread_id = omp_get_thread_num();
232 
233  cusparseStatus_t status = cusparseCreate(
234  &this->cusparse_handle[thread_id]);
235  assert(status == CUSPARSE_STATUS_SUCCESS);
236  }
237  }
238 }
239 
240 
241 // =================
242 // query gpu devices
243 // =================
244 
250 
251 template <typename DataType>
253 {
254  int device_count = 0;
255  cudaError_t error = cudaGetDeviceCount(&device_count);
256 
257  // Error code 38 means no cuda-capable device was detected.
258  if ((error != cudaSuccess) || (device_count < 1))
259  {
260  std::cerr << "ERROR: No cuda-capable GPU device was detected on " \
261  << "this machine. If a cuda-capable GPU device exists, " \
262  << "install its cuda driver. Alternatively, set " \
263  << "'gpu=False' to use cpu instead." \
264  << std::endl;
265 
266  abort();
267  }
268 
269  return device_count;
270 }
271 
272 
273 // ===============================
274 // Explicit template instantiation
275 // ===============================
276 
277 template class cuLinearOperator<float>;
278 template class cuLinearOperator<double>;
static int get_device()
Gets the current device in multi-gpu applications.
static void set_device(int device_id)
Sets the current device in multi-gpu applications.
Base class for linear operators. This class serves as interface for all derived classes.
void initialize_cusparse_handle()
Creates a cusparseHandle_t object, if not created already.
int query_gpu_devices() const
Before any numerical computation, this method chechs if any gpu device is available on the machine,...
cublasHandle_t get_cublas_handle() const
This function returns a reference to the cublasHandle_t object. The object will be created,...
void initialize_cublas_handle()
Creates a cublasHandle_t object, if not created already.
cudaError_t cudaGetDeviceCount(int *count)
Definition of CUDA's cudaGetDeviceCount function using dynamically loaded cudart library.
cusparseStatus_t cusparseDestroy(cusparseHandle_t handle)
Definition of CUDA's cusparseDestroy function using dynamically loaded cublas library.
cusparseStatus_t cusparseCreate(cusparseHandle_t *handle)
Definition of CUDA's cusparseCreate function using dynamically loaded cublas library.
int IndexType
Definition: types.h:65