17 #include <cublas_v2.h>
20 #include "../_cu_basic_algebra/cu_vector_operations.h"
21 #include "../_cuda_utilities/cuda_interface.h"
118 template <
typename DataType>
124 const DataType lanczos_tol,
134 if (orthogonalize == 0 || orthogonalize == 1)
139 else if ((orthogonalize < 0) ||
140 (orthogonalize >
static_cast<FlagType>(m)))
148 buffer_size = orthogonalize;
164 cublas_handle, device_r, n);
172 for (j=0; j < m; ++j)
181 cublas_handle, device_r, n, 1.0/initial_beta,
182 &device_V[(j % buffer_size)*n]);
187 cublas_handle, device_r, n, 1.0/beta[j-1],
188 &device_V[(j % buffer_size)*n]);
192 A->
dot(&device_V[(j % buffer_size)*n], device_r);
196 cublas_handle, &device_V[(j % buffer_size)*n], device_r, n);
200 cublas_handle, &device_V[(j % buffer_size)*n], n, alpha[j],
207 cublas_handle, &device_V[((j-1) % buffer_size)*n], n,
208 beta[j-1], device_r);
212 if (orthogonalize != 0)
221 num_ortho = buffer_size;
226 cublas_handle, &device_V[0], n, buffer_size, j%buffer_size,
227 num_ortho, device_r);
232 cublas_handle, device_r, n);
237 if (beta[j] < lanczos_tol * sqrt(n))
261 const float lanczos_tol,
271 const double lanczos_tol,
static void del(void *device_array)
Deletes memory on gpu device if its pointer is not NULL, then sets the pointer to NULL.
static ArrayType * alloc(const LongIndexType array_size)
Allocates memory on gpu device. This function creates a pointer and returns it.
static void copy_to_device(const ArrayType *host_array, const LongIndexType array_size, ArrayType *device_array)
Copies memory on host to device memory.
virtual void dot(const DataType *vector, DataType *product)=0
Base class for linear operators. This class serves as interface for all derived classes.
cublasHandle_t get_cublas_handle() const
This function returns a reference to the cublasHandle_t object. The object will be created,...
static void gram_schmidt_process(cublasHandle_t cublas_handle, const DataType *V, const LongIndexType vector_size, const IndexType num_vectors, const IndexType last_vector, const FlagType num_ortho, DataType *r)
Modified Gram-Schmidt orthogonalization process to orthogonalize the vector v against a subset of the...
static void copy_scaled_vector(cublasHandle_t cublas_handle, const DataType *input_vector, const LongIndexType vector_size, const DataType scale, DataType *output_vector)
Scales a vector and stores to a new vector.
static void subtract_scaled_vector(cublasHandle_t cublas_handle, const DataType *input_vector, const LongIndexType vector_size, const DataType scale, DataType *output_vector)
Subtracts the scaled input vector from the output vector.
static DataType inner_product(cublasHandle_t cublas_handle, const DataType *vector1, const DataType *vector2, const LongIndexType vector_size)
Computes Euclidean inner product of two vectors.
static DataType euclidean_norm(cublasHandle_t cublas_handle, const DataType *vector, const LongIndexType vector_size)
Computes the Euclidean 2-norm of a 1D array.
template IndexType cu_lanczos_tridiagonalization< double >(cuLinearOperator< double > *A, const double *v, const LongIndexType n, const IndexType m, const double lanczos_tol, const FlagType orthogonalize, double *alpha, double *beta)
template IndexType cu_lanczos_tridiagonalization< float >(cuLinearOperator< float > *A, const float *v, const LongIndexType n, const IndexType m, const float lanczos_tol, const FlagType orthogonalize, float *alpha, float *beta)
IndexType cu_lanczos_tridiagonalization(cuLinearOperator< DataType > *A, const DataType *v, const LongIndexType n, const IndexType m, const DataType lanczos_tol, const FlagType orthogonalize, DataType *alpha, DataType *beta)
Tri-diagonalizes matrix A to T using the start vector v. is the Lanczos degree, which will be the siz...