17 #include <cublas_v2.h>
19 #include "../_cu_basic_algebra/cu_vector_operations.h"
20 #include "../_cu_trace_estimator/cu_orthogonalization.h"
21 #include "../_cuda_utilities/cuda_interface.h"
112 template <
typename DataType>
118 const DataType lanczos_tol,
128 if (orthogonalize == 0)
133 else if ((orthogonalize < 0) ||
134 (orthogonalize >
static_cast<FlagType>(m) - 1))
143 buffer_size = orthogonalize + 1;
155 cublas_handle, &device_V[0], n);
163 for (j=0; j < m; ++j)
169 A->
dot(&device_V[(j % buffer_size)*n], &device_U[(j % buffer_size)*n]);
176 &device_U[((j-1) % buffer_size)*n], n, beta[j-1],
177 &device_U[(j % buffer_size)*n]);
181 if (orthogonalize != 0)
190 num_ortho = buffer_size - 1;
197 cublas_handle, &device_U[0], n, buffer_size,
198 (j-1)%buffer_size, num_ortho,
199 &device_U[(j % buffer_size)*n]);
205 cublas_handle, &device_U[(j % buffer_size)*n], n);
209 &device_V[((j+1) % buffer_size)*n]);
213 cublas_handle, &device_V[(j % buffer_size)*n], n, alpha[j],
214 &device_V[((j+1) % buffer_size)*n]);
217 if (orthogonalize != 0)
220 cublas_handle, &device_V[0], n, buffer_size, j%buffer_size,
221 num_ortho, &device_V[((j+1) % buffer_size)*n]);
226 cublas_handle, &device_V[((j+1) % buffer_size)*n], n);
231 if (beta[j] < lanczos_tol * sqrt(n))
255 const float lanczos_tol,
265 const double lanczos_tol,
static void del(void *device_array)
Deletes memory on gpu device if its pointer is not NULL, then sets the pointer to NULL.
static ArrayType * alloc(const LongIndexType array_size)
Allocates memory on gpu device. This function creates a pointer and returns it.
static void copy_to_device(const ArrayType *host_array, const LongIndexType array_size, ArrayType *device_array)
Copies memory on host to device memory.
virtual void transpose_dot(const DataType *vector, DataType *product)=0
virtual void dot(const DataType *vector, DataType *product)=0
Base class for linear operators. This class serves as interface for all derived classes.
cublasHandle_t get_cublas_handle() const
This function returns a reference to the cublasHandle_t object. The object will be created,...
static void gram_schmidt_process(cublasHandle_t cublas_handle, const DataType *V, const LongIndexType vector_size, const IndexType num_vectors, const IndexType last_vector, const FlagType num_ortho, DataType *r)
Modified Gram-Schmidt orthogonalization process to orthogonalize the vector v against a subset of the...
static void subtract_scaled_vector(cublasHandle_t cublas_handle, const DataType *input_vector, const LongIndexType vector_size, const DataType scale, DataType *output_vector)
Subtracts the scaled input vector from the output vector.
static DataType normalize_vector_in_place(cublasHandle_t cublas_handle, DataType *vector, const LongIndexType vector_size)
Normalizes a vector based on Euclidean 2-norm. The result is written in-place.
template IndexType cu_golub_kahn_bidiagonalization< double >(cuLinearOperator< double > *A, const double *v, const LongIndexType n, const IndexType m, const double lanczos_tol, const FlagType orthogonalize, double *alpha, double *beta)
IndexType cu_golub_kahn_bidiagonalization(cuLinearOperator< DataType > *A, const DataType *v, const LongIndexType n, const IndexType m, const DataType lanczos_tol, const FlagType orthogonalize, DataType *alpha, DataType *beta)
Bi-diagonalizes the positive-definite matrix A using Golub-Kahn-Lanczos method.
template IndexType cu_golub_kahn_bidiagonalization< float >(cuLinearOperator< float > *A, const float *v, const LongIndexType n, const IndexType m, const float lanczos_tol, const FlagType orthogonalize, float *alpha, float *beta)