imate
C++/CUDA Reference
cu_orthogonalization.cu
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <sameli@berkeley.edu>
3  * SPDX-License-Identifier: BSD-3-Clause
4  * SPDX-FileType: SOURCE
5  *
6  * This program is free software: you can redistribute it and/or modify it
7  * under the terms of the license found in the LICENSE.txt file in the root
8  * directory of this source tree.
9  */
10 
11 
12 // =======
13 // Imports
14 // =======
15 
16 #include "./cu_orthogonalization.h"
17 #include <cstdlib> // abort, NULL
18 #include <iostream> // std::cerr, std::endl
19 #include <cmath> // sqrt, std::fabs
20 #include <limits> // std::numeric_limits
21 #include "../_cu_basic_algebra/cu_vector_operations.h" // cuVectorOperations
22 #include "../_random_generator/random_array_generator.h" // RandomArrayGene...
23 #include "../_random_generator/random_number_generator.h" // RandomNumberGe...
24 #include "../_cuda_utilities/cuda_interface.h" // CudaInterface
25 
26 
27 // ====================
28 // gram schmidt process
29 // ====================
30 
126 
127 template <typename DataType>
129  cublasHandle_t cublas_handle,
130  const DataType* V,
131  const LongIndexType vector_size,
132  const IndexType num_vectors,
133  const IndexType last_vector,
134  const FlagType num_ortho,
135  DataType* v)
136 {
137  // Determine how many previous vectors to orthogonalize against
138  IndexType num_steps;
139  if ((num_ortho == 0) || (num_vectors < 2))
140  {
141  // No orthogonalization is performed
142  return;
143  }
144  else if ((num_ortho < 0) ||
145  (num_ortho > static_cast<FlagType>(num_vectors)))
146  {
147  // Orthogonalize against all vectors
148  num_steps = num_vectors;
149  }
150  else
151  {
152  // Orthogonalize against only the last num_ortho vectors
153  num_steps = num_ortho;
154  }
155 
156  // Vectors can be orthogonalized at most to the full basis of the vector
157  // space. Thus, num_steps cannot be larger than the dimension of vector
158  // space, which is vector_size.
159  if (num_steps > static_cast<IndexType>(vector_size))
160  {
161  num_steps = vector_size;
162  }
163 
164  IndexType i;
165  DataType inner_prod;
166  DataType norm;
167  DataType norm_v;
168  DataType epsilon = std::numeric_limits<DataType>::epsilon();
169  DataType distance2;
170 
171  // Iterate over vectors
172  for (IndexType step=0; step < num_steps; ++step)
173  {
174  // i is the index of a column vector in V to orthogonalize v against it
175  if ((last_vector % num_vectors) >= step)
176  {
177  i = (last_vector % num_vectors) - step;
178  }
179  else
180  {
181  // Wrap around negative indices from the end of column index
182  i = (last_vector % num_vectors) - step + num_vectors;
183  }
184 
185  // Norm of j-th vector
187  cublas_handle, &V[vector_size*i], vector_size);
188 
189  // Check norm
190  if (norm < epsilon * sqrt(vector_size))
191  {
192  std::cerr << "WARNING: norm of the given vector is too small. " \
193  << "Cannot orthogonalize against zero vector. " \
194  << "Skipping." << std::endl;
195  continue;
196  }
197 
198  // Projection
200  cublas_handle, &V[vector_size*i], v, vector_size);
201 
202  // scale for subtraction
203  DataType scale = inner_prod / (norm * norm);
204 
205  // If scale is is 1, it is possible that vector v and j-th vector are
206  // identical (or close).
207  if (std::abs(scale - 1.0) <= 2.0 * epsilon)
208  {
209  // Norm of the vector v
211  cublas_handle, v, vector_size);
212 
213  // Compute distance between the j-th vector and vector v
214  distance2 = norm_v*norm_v - 2.0*inner_prod + norm*norm;
215 
216  // If distance is zero, do not reorthogonalize i-th against
217  // the j-th vector.
218  if (distance2 < 2.0 * epsilon * vector_size)
219  {
220  continue;
221  }
222  }
223 
224  // Subtraction
226  cublas_handle, &V[vector_size*i], vector_size, scale, v);
227  }
228 }
229 
230 
231 // =====================
232 // orthogonalize vectors
233 // =====================
234 
273 
274 template <typename DataType>
276  cublasHandle_t cublas_handle,
277  DataType* vectors,
278  const LongIndexType vector_size,
279  const IndexType num_vectors)
280 {
281  // Do nothing if there is only one vector
282  if (num_vectors < 2)
283  {
284  return;
285  }
286 
287  IndexType i = 0;
288  IndexType j;
289  IndexType start_j;
290  DataType inner_prod;
291  DataType norm_j;
292  DataType norm_i;
293  DataType epsilon = std::numeric_limits<DataType>::epsilon();
294  IndexType success = 1;
295  IndexType max_num_trials = 20;
296  IndexType num_trials = 0;
297  IndexType num_threads = 1;
298  RandomNumberGenerator random_number_generator(num_threads);
299  DataType* buffer = NULL;
300 
301  while (i < num_vectors)
302  {
303  if ((success == 0) && (num_trials >= max_num_trials))
304  {
305  std::cerr << "ERROR: Cannot orthogonalize vectors after " \
306  << num_trials << " trials. Aborting." \
307  << std::endl;
308  abort();
309  }
310 
311  // Reset on new trial (if it was set to 0 before to start a new trial)
312  success = 1;
313 
314  // j iterates on previous vectors in a window of at most vector_size
315  if (static_cast<LongIndexType>(i) > vector_size)
316  {
317  // When vector_size is smaller than i, it is fine to cast to signed
318  start_j = i - static_cast<IndexType>(vector_size);
319  }
320  else
321  {
322  start_j = 0;
323  }
324 
325  // Reorthogonalize against previous vectors
326  for (j=start_j; j < i; ++j)
327  {
328  // Norm of the j-th vector
330  cublas_handle, &vectors[j*vector_size], vector_size);
331 
332  // Check norm
333  if (norm_j < epsilon * sqrt(vector_size))
334  {
335  std::cerr << "WARNING: norm of the given vector is too " \
336  << " small. Cannot reorthogonalize against zero" \
337  << "vector. Skipping."
338  << std::endl;
339  continue;
340  }
341 
342  // Projecting i-th vector to j-th vector
344  cublas_handle, &vectors[i*vector_size],
345  &vectors[j*vector_size], vector_size);
346 
347  // Scale of subtraction
348  DataType scale = inner_prod / (norm_j * norm_j);
349 
350  // Subtraction
352  cublas_handle, &vectors[vector_size*j], vector_size, scale,
353  &vectors[vector_size*i]);
354 
355  // Norm of the i-th vector
357  cublas_handle, &vectors[i*vector_size], vector_size);
358 
359  // If the norm is too small, regenerate the i-th vector randomly
360  if (norm_i < epsilon * sqrt(vector_size))
361  {
362  // Allocate buffer
363  if (buffer == NULL)
364  {
365  buffer = new DataType[vector_size];
366  }
367 
368  // Regenerate new random vector on buffer
370  random_number_generator, buffer,
371  vector_size, num_threads);
372 
373  // Copy buffer to the i-th vector on device
375  buffer, vector_size, &vectors[i*vector_size]);
376 
377  // Repeat the reorthogonalization for i-th vector against
378  // all previous vectors again.
379  success = 0;
380  ++num_trials;
381  break;
382  }
383  }
384 
385  if (success == 1)
386  {
387  ++i;
388 
389  // Reset if num_trials was incremented before.
390  num_trials = 0;
391  }
392  }
393 
394  // Deallocate buffer
395  if (buffer != NULL)
396  {
397  delete[] buffer;
398  buffer = NULL;
399  }
400 }
401 
402 
403 // ===============================
404 // Explicit template instantiation
405 // ===============================
406 
407 template class cuOrthogonalization<float>;
408 template class cuOrthogonalization<double>;
static void copy_to_device(const ArrayType *host_array, const LongIndexType array_size, ArrayType *device_array)
Copies memory on host to device memory.
static void generate_random_array(RandomNumberGenerator &random_number_generator, DataType *array, const LongIndexType array_size, const IndexType num_threads)
Generates a pseudo-random array with Rademacher distribution where elements are either +1 or -1.
Generates 64-bit integers on multiple parallel threads.
A static class for orthogonalization of vector bases. This class acts as a templated namespace,...
static void gram_schmidt_process(cublasHandle_t cublas_handle, const DataType *V, const LongIndexType vector_size, const IndexType num_vectors, const IndexType last_vector, const FlagType num_ortho, DataType *r)
Modified Gram-Schmidt orthogonalization process to orthogonalize the vector v against a subset of the...
static void orthogonalize_vectors(cublasHandle_t cublas_handle, DataType *vectors, const LongIndexType vector_size, const IndexType num_vectors)
Orthogonalizes set of vectors mutually using modified Gram-Schmidt process.
static void subtract_scaled_vector(cublasHandle_t cublas_handle, const DataType *input_vector, const LongIndexType vector_size, const DataType scale, DataType *output_vector)
Subtracts the scaled input vector from the output vector.
static DataType inner_product(cublasHandle_t cublas_handle, const DataType *vector1, const DataType *vector2, const LongIndexType vector_size)
Computes Euclidean inner product of two vectors.
static DataType euclidean_norm(cublasHandle_t cublas_handle, const DataType *vector, const LongIndexType vector_size)
Computes the Euclidean 2-norm of a 1D array.
int LongIndexType
Definition: types.h:60
int FlagType
Definition: types.h:68
int IndexType
Definition: types.h:65