17#include "../_definitions/definitions.h"
18#include "../_cu_definitions/cu_types.h"
21#if defined(USE_OPENMP) && (USE_OPENMP == 1)
29#include "../_cuda_utilities/cuda_api.h"
39template <
typename DataType>
44 copied_host_to_device(false),
72template <
typename DataType>
74 const int num_gpu_devices_):
78 copied_host_to_device(false),
86 if (num_gpu_devices_ == 0)
90 else if (num_gpu_devices_ > device_count)
92 std::cerr <<
"ERROR: Number of requested gpu devices exceeds the " \
93 <<
"number of available gpu devices. Nummber of detected " \
94 <<
"devices are " << device_count <<
" while the " \
95 <<
"requested number of devices are " << num_gpu_devices_ \
119template <
typename DataType>
123 if (this->cublas_handle != NULL)
126 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
130 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
135 unsigned int thread_id;
136 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
144 cublasStatus_t status = cublasDestroy(
145 this->cublas_handle[thread_id]);
146 assert(status == CUBLAS_STATUS_SUCCESS);
150 delete[] this->cublas_handle;
151 this->cublas_handle = NULL;
155 if (this->cusparse_handle != NULL)
158 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
162 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
167 unsigned int thread_id;
168 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
177 this->cusparse_handle[thread_id]);
178 assert(status == CUSPARSE_STATUS_SUCCESS);
182 delete[] this->cusparse_handle;
183 this->cusparse_handle = NULL;
204template <
typename DataType>
210 return this->cublas_handle[device_id];
221template <
typename DataType>
224 if (this->cublas_handle == NULL)
227 this->cublas_handle =
new cublasHandle_t[this->num_gpu_devices];
230 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
234 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
239 unsigned int thread_id;
240 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
248 cublasStatus_t status_create = cublasCreate(
249 &this->cublas_handle[thread_id]);
250 assert(status_create == CUBLAS_STATUS_SUCCESS);
254 this->cublas_handle[thread_id], CUBLAS_TENSOR_OP_MATH);
255 assert(status_set == CUBLAS_STATUS_SUCCESS);
268template <
typename DataType>
271 if (this->cusparse_handle == NULL)
274 this->cusparse_handle =
new cusparseHandle_t[this->num_gpu_devices];
277 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
281 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
286 unsigned int thread_id;
287 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
296 &this->cusparse_handle[thread_id]);
297 assert(status == CUSPARSE_STATUS_SUCCESS);
313template <
typename DataType>
316 int device_count = 0;
320 if ((error != cudaSuccess) || (device_count < 1))
322 std::cerr <<
"ERROR: No cuda-capable GPU device was detected on " \
323 <<
"this machine. If a cuda-capable GPU device exists, " \
324 <<
"install its cuda driver. Alternatively, set " \
325 <<
"'gpu=False' to use cpu instead." \
346template <
typename DataType>
349 this->parameters = parameters_;
357#if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
361#if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
365#if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
369#if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
373#if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)
377#if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)
static void set_device(int device_id)
Sets the current device in multi-gpu applications.
static int get_device()
Gets the current device in multi-gpu applications.
Base class for linear operators. This class serves as interface for all derived classes.
cuLinearOperator()
Default constructor.
void initialize_cusparse_handle()
Creates a cusparseHandle_t object, if not created already.
int query_gpu_devices() const
Before any numerical computation, this method chechs if any gpu device is available on the machine,...
cublasHandle_t get_cublas_handle() const
This function returns a reference to the cublasHandle_t object. The object will be created,...
virtual ~cuLinearOperator()
Destructor.
void initialize_cublas_handle()
Creates a cublasHandle_t object, if not created already.
void set_parameters(DataType *parameters_)
Sets the scalar parameter this->parameters. Parameter is initialized to NULL. However,...
void omp_set_num_threads(int num_threads)
cublasStatus_t cublasSetMathMode(cublasHandle_t handle, cublasMath_t mode)
Definition of CUDA's cublasSetmathMode function using dynamically loaded cublas library.
cudaError_t cudaGetDeviceCount(int *count)
Definition of CUDA's cudaGetDeviceCount function using dynamically loaded cudart library.
cusparseStatus_t cusparseDestroy(cusparseHandle_t handle)
Definition of CUDA's cusparseDestroy function using dynamically loaded cublas library.
cusparseStatus_t cusparseCreate(cusparseHandle_t *handle)
Definition of CUDA's cusparseCreate function using dynamically loaded cublas library.