doxygen/html/cu__lanczos__tridiagonalization_8cu_source.html

 /*

  *  SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <sameli@berkeley.edu>

  *  SPDX-License-Identifier: BSD-3-Clause

  *  SPDX-FileType: SOURCE

  *

  *  This program is free software: you can redistribute it and/or modify it

  *  under the terms of the license found in the LICENSE.txt file in the root

  *  directory of this source tree.

  */


 // =======

 // Headers

 // =======


 #include "./cu_lanczos_tridiagonalization.h"

 #include <cublas_v2.h>  // cublasHandle_t

 #include <cmath>  // sqrt

 #include "./cu_orthogonalization.h"  // cuOrthogonalization

 #include "../_cu_basic_algebra/cu_vector_operations.h"  // cuVectorOperations

 #include "../_cuda_utilities/cuda_interface.h"  // alloc, copy_to_device, del


 // ============================

 // c lanczos tridiagonalization

 // ============================


 template <typename DataType>

 IndexType cu_lanczos_tridiagonalization(

         cuLinearOperator<DataType>* A,

         const DataType* v,

         const LongIndexType n,

         const IndexType m,

         const DataType lanczos_tol,

         const FlagType orthogonalize,

         DataType* alpha,

         DataType* beta)

 {

     // Get cublas handle

     cublasHandle_t cublas_handle = A->get_cublas_handle();


     // buffer_size is number of last orthogonal vectors to keep in the buffer V

     IndexType buffer_size;

     if (orthogonalize == 0 || orthogonalize == 1)

     {

         // At least two vectors must be stored in buffer for Lanczos recursion

         buffer_size = 2;

     }

     else if ((orthogonalize < 0) ||

              (orthogonalize > static_cast<FlagType>(m)))

     {

         // Using full reorthogonalization, keep all of the m vectors in buffer

         buffer_size = m;

     }

     else

     {

         // Orthogonalize with less than m vectors (0 < orthogonalize < m)

         buffer_size = orthogonalize;

     }


     // Allocate 2D array (as 1D array, and coalesced row-wise) to store

     // the last buffer_size of orthogonalized vectors of length n. New vectors

     // are stored by cycling through the buffer to replace with old ones.

     DataType* device_V = CudaInterface<DataType>::alloc(n * buffer_size);


     // Allocate vector r

     DataType* device_r = CudaInterface<DataType>::alloc(n);


     // Copy v into r

     CudaInterface<DataType>::copy_to_device(v, n, device_r);


     // Initial beta

     DataType initial_beta = cuVectorOperations<DataType>::euclidean_norm(

             cublas_handle, device_r, n);


     // Declare iterators

     IndexType j;

     IndexType lanczos_size = 0;

     IndexType num_ortho;


     // In the following, beta[j] means beta[j-1] in the Demmel text

     for (j=0; j < m; ++j)

     {

         // Update the size of Lanczos tridiagonal matrix

         ++lanczos_size;


         // Normalize r and copy to the j-th column of V

         if (j == 0)

         {

             cuVectorOperations<DataType>::copy_scaled_vector(

                     cublas_handle, device_r, n, 1.0/initial_beta,

                     &device_V[(j % buffer_size)*n]);

         }

         else

         {

             cuVectorOperations<DataType>::copy_scaled_vector(

                     cublas_handle, device_r, n, 1.0/beta[j-1],

                     &device_V[(j % buffer_size)*n]);

         }


         // Multiply A to the j-th column of V, write into r

         A->dot(&device_V[(j % buffer_size)*n], device_r);


         // alpha[j] is V[:, j] dot r

         alpha[j] = cuVectorOperations<DataType>::inner_product(

                 cublas_handle, &device_V[(j % buffer_size)*n], device_r, n);


         // Subtract V[:,j] * alpha[j] from r

         cuVectorOperations<DataType>::subtract_scaled_vector(

                 cublas_handle, &device_V[(j % buffer_size)*n], n, alpha[j],

                 device_r);


         // Subtract V[:,j-1] * beta[j] from r

         if (j > 0)

         {

             cuVectorOperations<DataType>::subtract_scaled_vector(

                     cublas_handle, &device_V[((j-1) % buffer_size)*n], n,

                     beta[j-1], device_r);

         }


         // Gram-Schmidt process (full re-orthogonalization)

         if (orthogonalize != 0)

         {

             // Find how many column vectors are filled so far in the buffer V

             if (j < buffer_size)

             {

                 num_ortho = j+1;

             }

             else

             {

                 num_ortho = buffer_size;

             }


             // Gram-Schmidt process

             cuOrthogonalization<DataType>::gram_schmidt_process(

                     cublas_handle, &device_V[0], n, buffer_size, j%buffer_size,

                     num_ortho, device_r);

         }


         // beta is norm of r

         beta[j] = cuVectorOperations<DataType>::euclidean_norm(

                 cublas_handle, device_r, n);


         // Exit criterion when the vector r is zero. If each component of a

         // zero vector has the tolerance epsilon, (which is called lanczos_tol

         // here), the tolerance of norm of r is epsilon times sqrt of n.

         if (beta[j] < lanczos_tol * sqrt(n))

         {

             break;

         }

     }


     // Free dynamic memory

     CudaInterface<DataType>::del(device_V);

     CudaInterface<DataType>::del(device_r);


     return lanczos_size;

 }


 // ===============================

 // Explicit template instantiation

 // ===============================


 // lanczos tridiagonalization

 template IndexType cu_lanczos_tridiagonalization<float>(

         cuLinearOperator<float>* A,

         const float* v,

         const LongIndexType n,

         const IndexType m,

         const float lanczos_tol,

         const FlagType orthogonalize,

         float* alpha,

         float* beta);


 template IndexType cu_lanczos_tridiagonalization<double>(

         cuLinearOperator<double>* A,

         const double* v,

         const LongIndexType n,

         const IndexType m,

         const double lanczos_tol,

         const FlagType orthogonalize,

         double* alpha,

         double* beta);

CudaInterface::del
static void del(void *device_array)
Deletes memory on gpu device if its pointer is not NULL, then sets the pointer to NULL.
Definition: cuda_interface.cu:166

CudaInterface::alloc
static ArrayType * alloc(const LongIndexType array_size)
Allocates memory on gpu device. This function creates a pointer and returns it.
Definition: cuda_interface.cu:36

CudaInterface::copy_to_device
static void copy_to_device(const ArrayType *host_array, const LongIndexType array_size, ArrayType *device_array)
Copies memory on host to device memory.
Definition: cuda_interface.cu:142

cLinearOperator::dot
virtual void dot(const DataType *vector, DataType *product)=0

cuLinearOperator
Base class for linear operators. This class serves as interface for all derived classes.
Definition: cu_linear_operator.h:44

cuLinearOperator::get_cublas_handle
cublasHandle_t get_cublas_handle() const
This function returns a reference to the cublasHandle_t object. The object will be created,...
Definition: cu_linear_operator.cu:168

cuOrthogonalization::gram_schmidt_process
static void gram_schmidt_process(cublasHandle_t cublas_handle, const DataType *V, const LongIndexType vector_size, const IndexType num_vectors, const IndexType last_vector, const FlagType num_ortho, DataType *r)
Modified Gram-Schmidt orthogonalization process to orthogonalize the vector v against a subset of the...
Definition: cu_orthogonalization.cu:128

cuVectorOperations::copy_scaled_vector
static void copy_scaled_vector(cublasHandle_t cublas_handle, const DataType *input_vector, const LongIndexType vector_size, const DataType scale, DataType *output_vector)
Scales a vector and stores to a new vector.
Definition: cu_vector_operations.cu:73

cuVectorOperations::subtract_scaled_vector
static void subtract_scaled_vector(cublasHandle_t cublas_handle, const DataType *input_vector, const LongIndexType vector_size, const DataType scale, DataType *output_vector)
Subtracts the scaled input vector from the output vector.
Definition: cu_vector_operations.cu:126

cuVectorOperations::inner_product
static DataType inner_product(cublasHandle_t cublas_handle, const DataType *vector1, const DataType *vector2, const LongIndexType vector_size)
Computes Euclidean inner product of two vectors.
Definition: cu_vector_operations.cu:166

cuVectorOperations::euclidean_norm
static DataType euclidean_norm(cublasHandle_t cublas_handle, const DataType *vector, const LongIndexType vector_size)
Computes the Euclidean 2-norm of a 1D array.
Definition: cu_vector_operations.cu:201

cu_lanczos_tridiagonalization< double >
template IndexType cu_lanczos_tridiagonalization< double >(cuLinearOperator< double > *A, const double *v, const LongIndexType n, const IndexType m, const double lanczos_tol, const FlagType orthogonalize, double *alpha, double *beta)

cu_lanczos_tridiagonalization< float >
template IndexType cu_lanczos_tridiagonalization< float >(cuLinearOperator< float > *A, const float *v, const LongIndexType n, const IndexType m, const float lanczos_tol, const FlagType orthogonalize, float *alpha, float *beta)

cu_lanczos_tridiagonalization
IndexType cu_lanczos_tridiagonalization(cuLinearOperator< DataType > *A, const DataType *v, const LongIndexType n, const IndexType m, const DataType lanczos_tol, const FlagType orthogonalize, DataType *alpha, DataType *beta)
Tri-diagonalizes matrix A to T using the start vector v. is the Lanczos degree, which will be the siz...
Definition: cu_lanczos_tridiagonalization.cu:119

cu_lanczos_tridiagonalization.h

cu_orthogonalization.h

LongIndexType
int LongIndexType
Definition: types.h:60

FlagType
int FlagType
Definition: types.h:68

IndexType
int IndexType
Definition: types.h:65