doxygen/html/cu__orthogonalization_8cu_source.html

 /*

  *  SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <sameli@berkeley.edu>

  *  SPDX-License-Identifier: BSD-3-Clause

  *  SPDX-FileType: SOURCE

  *

  *  This program is free software: you can redistribute it and/or modify it

  *  under the terms of the license found in the LICENSE.txt file in the root

  *  directory of this source tree.

  */


 // =======

 // Imports

 // =======


 #include "./cu_orthogonalization.h"

 #include <cstdlib>  // abort, NULL

 #include <iostream>  // std::cerr, std::endl

 #include <cmath>  // sqrt, std::fabs

 #include <limits>  // std::numeric_limits

 #include "../_cu_basic_algebra/cu_vector_operations.h"  // cuVectorOperations

 #include "../_random_generator/random_array_generator.h"  // RandomArrayGene...

 #include "../_random_generator/random_number_generator.h"  // RandomNumberGe...

 #include "../_cuda_utilities/cuda_interface.h"  // CudaInterface


 // ====================

 // gram schmidt process

 // ====================


 template <typename DataType>

 void cuOrthogonalization<DataType>::gram_schmidt_process(

         cublasHandle_t cublas_handle,

         const DataType* V,

         const LongIndexType vector_size,

         const IndexType num_vectors,

         const IndexType last_vector,

         const FlagType num_ortho,

         DataType* v)

 {

     // Determine how many previous vectors to orthogonalize against

     IndexType num_steps;

     if ((num_ortho == 0) || (num_vectors < 2))

     {

         // No orthogonalization is performed

         return;

     }

     else if ((num_ortho < 0) ||

              (num_ortho > static_cast<FlagType>(num_vectors)))

     {

         // Orthogonalize against all vectors

         num_steps = num_vectors;

     }

     else

     {

         // Orthogonalize against only the last num_ortho vectors

         num_steps = num_ortho;

     }


     // Vectors can be orthogonalized at most to the full basis of the vector

     // space. Thus, num_steps cannot be larger than the dimension of vector

     // space, which is vector_size.

     if (num_steps > static_cast<IndexType>(vector_size))

     {

         num_steps = vector_size;

     }


     IndexType i;

     DataType inner_prod;

     DataType norm;

     DataType norm_v;

     DataType epsilon = std::numeric_limits<DataType>::epsilon();

     DataType distance2;


     // Iterate over vectors

     for (IndexType step=0; step < num_steps; ++step)

     {

         // i is the index of a column vector in V to orthogonalize v against it

         if ((last_vector % num_vectors) >= step)

         {

             i = (last_vector % num_vectors) - step;

         }

         else

         {

             // Wrap around negative indices from the end of column index

             i = (last_vector % num_vectors) - step + num_vectors;

         }


         // Norm of j-th vector

         norm = cuVectorOperations<DataType>::euclidean_norm(

                 cublas_handle, &V[vector_size*i], vector_size);


         // Check norm

         if (norm < epsilon * sqrt(vector_size))

         {

             std::cerr << "WARNING: norm of the given vector is too small. " \

                       << "Cannot orthogonalize against zero vector. " \

                       << "Skipping." << std::endl;

             continue;

         }


         // Projection

         inner_prod = cuVectorOperations<DataType>::inner_product(

                 cublas_handle, &V[vector_size*i], v, vector_size);


         // scale for subtraction

         DataType scale = inner_prod / (norm * norm);


         // If scale is is 1, it is possible that vector v and j-th vector are

         // identical (or close).

         if (std::abs(scale - 1.0) <= 2.0 * epsilon)

         {

             // Norm of the vector v

             norm_v = cuVectorOperations<DataType>::euclidean_norm(

                     cublas_handle, v, vector_size);


             // Compute distance between the j-th vector and vector v

             distance2 = norm_v*norm_v - 2.0*inner_prod + norm*norm;


             // If distance is zero, do not reorthogonalize i-th against

             // the j-th vector.

             if (distance2 < 2.0 * epsilon * vector_size)

             {

                 continue;

             }

         }


         // Subtraction

         cuVectorOperations<DataType>::subtract_scaled_vector(

                 cublas_handle, &V[vector_size*i], vector_size, scale, v);

     }

 }


 // =====================

 // orthogonalize vectors

 // =====================


 template <typename DataType>

 void cuOrthogonalization<DataType>::orthogonalize_vectors(

         cublasHandle_t cublas_handle,

         DataType* vectors,

         const LongIndexType vector_size,

         const IndexType num_vectors)

 {

     // Do nothing if there is only one vector

     if (num_vectors < 2)

     {

         return;

     }


     IndexType i = 0;

     IndexType j;

     IndexType start_j;

     DataType inner_prod;

     DataType norm_j;

     DataType norm_i;

     DataType epsilon = std::numeric_limits<DataType>::epsilon();

     IndexType success = 1;

     IndexType max_num_trials = 20;

     IndexType num_trials = 0;

     IndexType num_threads = 1;

     RandomNumberGenerator random_number_generator(num_threads);

     DataType* buffer = NULL;


     while (i < num_vectors)

     {

         if ((success == 0) && (num_trials >= max_num_trials))

         {

             std::cerr << "ERROR: Cannot orthogonalize vectors after " \

                       << num_trials << " trials. Aborting." \

                       << std::endl;

             abort();

         }


         // Reset on new trial (if it was set to 0 before to start a new trial)

         success = 1;


         // j iterates on previous vectors in a window of at most vector_size

         if (static_cast<LongIndexType>(i) > vector_size)

         {

             // When vector_size is smaller than i, it is fine to cast to signed

             start_j = i - static_cast<IndexType>(vector_size);

         }

         else

         {

             start_j = 0;

         }


         // Reorthogonalize against previous vectors

         for (j=start_j; j < i; ++j)

         {

             // Norm of the j-th vector

             norm_j = cuVectorOperations<DataType>::euclidean_norm(

                     cublas_handle, &vectors[j*vector_size], vector_size);


             // Check norm

             if (norm_j < epsilon * sqrt(vector_size))

             {

                 std::cerr << "WARNING: norm of the given vector is too " \

                           << " small. Cannot reorthogonalize against zero" \

                           << "vector. Skipping."

                           << std::endl;

                 continue;

             }


             // Projecting i-th vector to j-th vector

             inner_prod = cuVectorOperations<DataType>::inner_product(

                     cublas_handle, &vectors[i*vector_size],

                     &vectors[j*vector_size], vector_size);


             // Scale of subtraction

             DataType scale = inner_prod / (norm_j * norm_j);


             // Subtraction

             cuVectorOperations<DataType>::subtract_scaled_vector(

                     cublas_handle, &vectors[vector_size*j], vector_size, scale,

                     &vectors[vector_size*i]);


             // Norm of the i-th vector

             norm_i = cuVectorOperations<DataType>::euclidean_norm(

                     cublas_handle, &vectors[i*vector_size], vector_size);


             // If the norm is too small, regenerate the i-th vector randomly

             if (norm_i < epsilon * sqrt(vector_size))

             {

                 // Allocate buffer

                 if (buffer == NULL)

                 {

                     buffer = new DataType[vector_size];

                 }


                 // Regenerate new random vector on buffer

                 RandomArrayGenerator<DataType>::generate_random_array(

                         random_number_generator, buffer,

                         vector_size, num_threads);


                 // Copy buffer to the i-th vector on device

                 CudaInterface<DataType>::copy_to_device(

                         buffer, vector_size, &vectors[i*vector_size]);


                 // Repeat the reorthogonalization for i-th vector against

                 // all previous vectors again.

                 success = 0;

                 ++num_trials;

                 break;

             }

         }


         if (success == 1)

         {

             ++i;


             // Reset if num_trials was incremented before.

             num_trials = 0;

         }

     }


     // Deallocate buffer

     if (buffer != NULL)

     {

         delete[] buffer;

         buffer = NULL;

     }

 }


 // ===============================

 // Explicit template instantiation

 // ===============================


 template class cuOrthogonalization<float>;

 template class cuOrthogonalization<double>;

CudaInterface::copy_to_device
static void copy_to_device(const ArrayType *host_array, const LongIndexType array_size, ArrayType *device_array)
Copies memory on host to device memory.
Definition: cuda_interface.cu:142

RandomArrayGenerator::generate_random_array
static void generate_random_array(RandomNumberGenerator &random_number_generator, DataType *array, const LongIndexType array_size, const IndexType num_threads)
Generates a pseudo-random array with Rademacher distribution where elements are either +1 or -1.
Definition: random_array_generator.cpp:63

RandomNumberGenerator
Generates 64-bit integers on multiple parallel threads.
Definition: random_number_generator.h:104

cuOrthogonalization
A static class for orthogonalization of vector bases. This class acts as a templated namespace,...
Definition: cu_orthogonalization.h:37

cuOrthogonalization::gram_schmidt_process
static void gram_schmidt_process(cublasHandle_t cublas_handle, const DataType *V, const LongIndexType vector_size, const IndexType num_vectors, const IndexType last_vector, const FlagType num_ortho, DataType *r)
Modified Gram-Schmidt orthogonalization process to orthogonalize the vector v against a subset of the...
Definition: cu_orthogonalization.cu:128

cuOrthogonalization::orthogonalize_vectors
static void orthogonalize_vectors(cublasHandle_t cublas_handle, DataType *vectors, const LongIndexType vector_size, const IndexType num_vectors)
Orthogonalizes set of vectors mutually using modified Gram-Schmidt process.
Definition: cu_orthogonalization.cu:275

cuVectorOperations::subtract_scaled_vector
static void subtract_scaled_vector(cublasHandle_t cublas_handle, const DataType *input_vector, const LongIndexType vector_size, const DataType scale, DataType *output_vector)
Subtracts the scaled input vector from the output vector.
Definition: cu_vector_operations.cu:126

cuVectorOperations::inner_product
static DataType inner_product(cublasHandle_t cublas_handle, const DataType *vector1, const DataType *vector2, const LongIndexType vector_size)
Computes Euclidean inner product of two vectors.
Definition: cu_vector_operations.cu:166

cuVectorOperations::euclidean_norm
static DataType euclidean_norm(cublasHandle_t cublas_handle, const DataType *vector, const LongIndexType vector_size)
Computes the Euclidean 2-norm of a 1D array.
Definition: cu_vector_operations.cu:201

cu_orthogonalization.h

LongIndexType
int LongIndexType
Definition: types.h:60

FlagType
int FlagType
Definition: types.h:68

IndexType
int IndexType
Definition: types.h:65