17#include "../_cu_definitions/cu_types.h"
23#include "../_cu_basic_algebra/cu_vector_operations.h"
24#include "../_random_generator/random_array_generator.h"
25#include "../_random_generator/random_number_generator.h"
26#include "../_cuda_utilities/cuda_api.h"
27#include "../_cu_arithmetics/cu_arithmetics.h"
130template <
typename DataType>
132 cublasHandle_t cublas_handle,
142 if ((num_ortho == 0) || (num_vectors < 2))
147 else if ((num_ortho < 0) ||
148 (num_ortho >
static_cast<FlagType>(num_vectors)))
151 num_steps = num_vectors;
156 num_steps = num_ortho;
162 if (num_steps >
static_cast<IndexType>(vector_size))
164 num_steps = vector_size;
175 for (
IndexType step=0; step < num_steps; ++step)
178 if ((last_vector % num_vectors) >= step)
180 i = (last_vector % num_vectors) - step;
185 i = (last_vector % num_vectors) - step + num_vectors;
190 cublas_handle, &V[vector_size*i], vector_size);
197 std::sqrt(vector_size))
201 std::cerr <<
"WARNING: norm of the given vector is too small. " \
202 <<
"Cannot orthogonalize against zero vector. " \
203 <<
"Skipping." << std::endl;
209 cublas_handle, &V[vector_size*i], v, vector_size);
226 cublas_handle, v, vector_size);
243 static_cast<unsigned long long int>(vector_size))
253 cublas_handle, &V[vector_size*i], vector_size, scale, v);
301template <
typename DataType>
303 cublasHandle_t cublas_handle,
326 DataType* buffer = NULL;
328 while (i < num_vectors)
330 if ((success == 0) && (num_trials >= max_num_trials))
332 std::cerr <<
"ERROR: Cannot orthogonalize vectors after " \
333 << num_trials <<
" trials. Aborting." \
345 start_j = i -
static_cast<IndexType>(vector_size);
353 for (j=start_j; j < i; ++j)
357 cublas_handle, &vectors[j*vector_size], vector_size);
364 std::sqrt(vector_size))
368 std::cerr <<
"WARNING: norm of the given vector is too " \
369 <<
" small. Cannot re-orthogonalize against zero" \
370 <<
"vector. Skipping."
377 cublas_handle, &vectors[i*vector_size],
378 &vectors[j*vector_size], vector_size);
387 cublas_handle, &vectors[vector_size*j], vector_size, scale,
388 &vectors[vector_size*i]);
392 cublas_handle, &vectors[i*vector_size], vector_size);
399 std::sqrt(vector_size))
406 buffer =
new DataType[vector_size];
411 random_number_generator, buffer,
412 vector_size, num_threads);
416 buffer, vector_size, &vectors[i*vector_size]);
448#if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
452#if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
456#if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
460#if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
464#if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)
468#if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)
static void copy_to_device(const ArrayType *host_array, const size_t array_size, ArrayType *device_array)
Copies memory on host to device memory.
static void generate_random_array(RandomNumberGenerator &random_number_generator, DataType *array, const LongIndexType array_size, const IndexType num_threads)
Generates a pseudo-random array with Rademacher distribution where elements are either +1 or -1.
Generates 64-bit integers on multiple parallel threads.
A static class for orthogonalization of vector bases. This class acts as a templated namespace,...
static void gram_schmidt_process(cublasHandle_t cublas_handle, const DataType *V, const LongIndexType vector_size, const IndexType num_vectors, const IndexType last_vector, const FlagType num_ortho, DataType *r)
Modified Gram-Schmidt orthogonalization process to orthogonalize the vector v against a subset of the...
static void orthogonalize_vectors(cublasHandle_t cublas_handle, DataType *vectors, const LongIndexType vector_size, const IndexType num_vectors)
Orthogonalizes set of vectors mutually using modified Gram-Schmidt process.
static void subtract_scaled_vector(cublasHandle_t cublas_handle, const DataType *RESTRICT input_vector, const LongIndexType vector_size, const DataType scale, DataType *RESTRICT output_vector)
Subtracts the scaled input vector from the output vector.
static DataType inner_product(cublasHandle_t cublas_handle, const DataType *RESTRICT vector1, const DataType *RESTRICT vector2, const LongIndexType vector_size)
Computes Euclidean inner product of two vectors.
static DataType euclidean_norm(cublasHandle_t cublas_handle, const DataType *RESTRICT vector, const LongIndexType vector_size)
Computes the Euclidean 2-norm of a 1D array.
__host__ __device__ DataType mul(const DataType x, const DataType y)
Multiply two floating point numbers in round-to-nearest-even mode.
__host__ __device__ DataType abs(const DataType x)
Absolute value of a floating point number.
__host__ __device__ DataType sub(const DataType x, const DataType y)
Subtract two floating point numbers in round-to-nearest-even mode.
__host__ __device__ DataType div(const DataType x, const DataType y)
Divide two floating point numbers in round-to-nearest-even mode.