22 #include "../_random_generator/random_array_generator.h"
23 #include "../_c_trace_estimator/diagonalization.h"
24 #include "../_c_trace_estimator/convergence_tools.h"
25 #include "../_cuda_utilities/cuda_timer.h"
26 #include "../_cuda_utilities/cuda_interface.h"
196 template <
typename DataType>
199 DataType* parameters,
203 const DataType exponent,
207 const DataType lanczos_tol,
210 const DataType error_atol,
211 const DataType error_rtol,
212 const DataType confidence_level,
213 const DataType outlier_significance_level,
223 float& alg_wall_time)
229 omp_set_num_threads(num_gpu_devices);
236 IndexType random_vectors_size = matrix_size * num_gpu_devices;
237 DataType* random_vectors =
new DataType[random_vectors_size];
252 unsigned int chunk_size =
static_cast<int>(
253 sqrt(
static_cast<DataType
>(max_num_samples) / num_gpu_devices));
265 #pragma omp parallel for schedule(dynamic, chunk_size)
266 for (i=0; i < max_num_samples; ++i)
268 if (!
static_cast<bool>(all_converged))
271 unsigned int thread_id = omp_get_thread_num();
276 A, parameters, num_inquiries, matrix_function, gram,
277 exponent, orthogonalize, lanczos_degree, lanczos_tol,
278 random_number_generator,
279 &random_vectors[matrix_size*thread_id], converged,
286 processed_samples_indices[num_processed_samples] = i;
287 ++num_processed_samples;
293 samples, min_num_samples, num_inquiries,
294 processed_samples_indices, num_processed_samples,
295 confidence_level, error_atol, error_rtol, error,
296 num_samples_used, converged);
303 alg_wall_time = cuda_timer.
elapsed();
307 confidence_level, outlier_significance_level, num_inquiries,
308 max_num_samples, num_samples_used, processed_samples_indices,
309 samples, num_outliers, trace, error);
312 delete[] random_vectors;
314 return all_converged;
418 template <
typename DataType>
421 DataType* parameters,
425 const DataType exponent,
428 const DataType lanczos_tol,
430 DataType* random_vector,
432 DataType* trace_estimate)
443 random_number_generator, random_vector, matrix_size, num_threads);
446 DataType* alpha =
new DataType[lanczos_degree];
447 DataType* beta =
new DataType[lanczos_degree];
451 DataType* eigenvectors = NULL;
452 DataType* left_singularvectors = NULL;
453 DataType* right_singularvectors_transposed = NULL;
456 IndexType required_num_inquiries = num_inquiries;
464 required_num_inquiries = 1;
470 DataType** theta =
new DataType*[num_inquiries];
471 for (j=0; j < num_inquiries; ++j)
473 theta[j] =
new DataType[lanczos_degree];
476 for (i=0; i < lanczos_degree; ++i)
483 DataType** tau =
new DataType*[num_inquiries];
484 for (j=0; j < num_inquiries; ++j)
486 tau[j] =
new DataType[lanczos_degree];
489 for (i=0; i < lanczos_degree; ++i)
505 for (j=0; j < required_num_inquiries; ++j)
511 if ((converged[j] == 1) && (required_num_inquiries == num_inquiries))
523 A, random_vector, matrix_size, lanczos_degree, lanczos_tol,
524 orthogonalize, alpha, beta);
527 left_singularvectors = \
528 new DataType[lanczos_size[j] * lanczos_size[j]];
529 right_singularvectors_transposed = \
530 new DataType[lanczos_size[j] * lanczos_size[j]];
534 alpha, beta, left_singularvectors,
535 right_singularvectors_transposed, lanczos_size[j]);
538 for (i=0; i < lanczos_size[j]; ++i)
540 theta[j][i] = alpha[i] * alpha[i];
541 tau[j][i] = right_singularvectors_transposed[i];
548 A, random_vector, matrix_size, lanczos_degree, lanczos_tol,
549 orthogonalize, alpha, beta);
552 eigenvectors =
new DataType[lanczos_size[j] * lanczos_size[j]];
556 alpha, beta, eigenvectors, lanczos_size[j]);
559 for (i=0; i < lanczos_size[j]; ++i)
561 theta[j][i] = alpha[i];
562 tau[j][i] = eigenvectors[i * lanczos_size[j]];
579 for (j=0; j < num_inquiries; ++j)
585 for (j=1; j < num_inquiries; ++j)
588 lanczos_size[j] = lanczos_size[0];
590 for (i=0; i < lanczos_size[j]; ++i)
596 ¶meters[j*num_parameters]);
599 tau[j][i] = tau[0][i];
605 DataType quadrature_sum;
606 for (j=0; j < num_inquiries; ++j)
609 if (converged[j] == 1)
615 quadrature_sum = 0.0;
622 for (i=0; i < lanczos_size[j]; ++i)
624 quadrature_sum += tau[j][i] * tau[j][i] * \
625 matrix_function->function(pow(theta[j][i], exponent));
628 trace_estimate[j] = matrix_size * quadrature_sum;
634 delete[] lanczos_size;
636 for (j=0; j < required_num_inquiries; ++j)
642 for (j=0; j < required_num_inquiries; ++j)
648 if (eigenvectors != NULL)
650 delete[] eigenvectors;
653 if (left_singularvectors != NULL)
655 delete[] left_singularvectors;
658 if (right_singularvectors_transposed != NULL)
660 delete[] right_singularvectors_transposed;
static void set_device(int device_id)
Sets the current device in multi-gpu applications.
Records elasped time between two CUDA events.
void stop()
Stops the timer.
void start()
Starts the timer.
float elapsed() const
Returns the elapsed time in seconds.
static int eigh_tridiagonal(DataType *diagonals, DataType *subdiagonals, DataType *eigenvectors, IndexType matrix_size)
Computes all eigenvalues and eigenvectors of a real and symmetric tridiagonal matrix.
static int svd_bidiagonal(DataType *diagonals, DataType *subdiagonals, DataType *U, DataType *Vt, IndexType matrix_size)
Computes all singular-values and left and right eigenvectors of a real and symmetric upper bi-diagona...
static void generate_random_array(RandomNumberGenerator &random_number_generator, DataType *array, const LongIndexType array_size, const IndexType num_threads)
Generates a pseudo-random array with Rademacher distribution where elements are either +1 or -1.
Generates 64-bit integers on multiple parallel threads.
virtual DataType get_eigenvalue(const DataType *known_parameters, const DataType known_eigenvalue, const DataType *inquiry_parameters) const =0
void set_parameters(DataType *parameters_)
Sets the scalar parameter this->parameters. Parameter is initialized to NULL. However,...
LongIndexType get_num_rows() const
FlagType is_eigenvalue_relation_known() const
Returns a flag that determines whether a relation between the parameters of the operator and its eige...
IndexType get_num_parameters() const
Base class for linear operators. This class serves as interface for all derived classes.
A static class to compute the trace of implicit matrix functions using stochastic Lanczos quadrature ...
static void _cu_stochastic_lanczos_quadrature(cuLinearOperator< DataType > *A, DataType *parameters, const IndexType num_inquiries, const Function *matrix_function, const FlagType gram, const DataType exponent, const FlagType orthogonalize, const IndexType lanczos_degree, const DataType lanczos_tol, RandomNumberGenerator &random_number_generator, DataType *random_vector, FlagType *converged, DataType *trace_estimate)
For a given random input vector, computes one Monte-Carlo sample to estimate trace using Lanczos quad...
static FlagType cu_trace_estimator(cuLinearOperator< DataType > *A, DataType *parameters, const IndexType num_inquiries, const Function *matrix_function, const FlagType gram, const DataType exponent, const FlagType orthogonalize, const int64_t seed, const IndexType lanczos_degree, const DataType lanczos_tol, const IndexType min_num_samples, const IndexType max_num_samples, const DataType error_atol, const DataType error_rtol, const DataType confidence_level, const DataType outlier_significance_level, const IndexType num_threads, const IndexType num_gpu_devices, DataType *trace, DataType *error, DataType **samples, IndexType *processed_samples_indices, IndexType *num_samples_used, IndexType *num_outliers, FlagType *converged, float &alg_wall_time)
Stochastic Lanczos quadrature method to estimate trace of a function of a linear operator....
IndexType cu_golub_kahn_bidiagonalization(cuLinearOperator< DataType > *A, const DataType *v, const LongIndexType n, const IndexType m, const DataType lanczos_tol, const FlagType orthogonalize, DataType *alpha, DataType *beta)
Bi-diagonalizes the positive-definite matrix A using Golub-Kahn-Lanczos method.
IndexType cu_lanczos_tridiagonalization(cuLinearOperator< DataType > *A, const DataType *v, const LongIndexType n, const IndexType m, const DataType lanczos_tol, const FlagType orthogonalize, DataType *alpha, DataType *beta)
Tri-diagonalizes matrix A to T using the start vector v. is the Lanczos degree, which will be the siz...