doxygen/html/convergence__tools_8cpp_source.html

 /*

  *  SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <sameli@berkeley.edu>

  *  SPDX-License-Identifier: BSD-3-Clause

  *  SPDX-FileType: SOURCE

  *

  *  This program is free software: you can redistribute it and/or modify it

  *  under the terms of the license found in the LICENSE.txt file in the root

  *  directory of this source tree.

  */


 // =======

 // Headers

 // =======


 #include "./convergence_tools.h"

 #include <cmath>  // sqrt, std::abs, INFINITY, NAN, isnan

 #include <algorithm>  // std::max

 #include "./special_functions.h"  // erf_inv


 // =================

 // check convergence

 // =================


 template <typename DataType>

 FlagType ConvergenceTools<DataType>::check_convergence(

         DataType** samples,

         const IndexType min_num_samples,

         const IndexType num_inquiries,

         const IndexType* processed_samples_indices,

         const IndexType num_processed_samples,

         const DataType confidence_level,

         const DataType error_atol,

         const DataType error_rtol,

         DataType* error,

         IndexType* num_samples_used,

         FlagType* converged)

 {

     FlagType all_converged;

     IndexType j;


     // If number of processed samples are not enough, set to not converged yet.

     // This is essential since in the first few iterations, the standard

     // deviation of the cumulative averages are still too small.

     if (num_processed_samples < min_num_samples)

     {

         // Skip computing error. Fill outputs with trivial initial values

         for (j=0; j < num_inquiries; j++)

         {

             error[j] = INFINITY;

             converged[j] = 0;

             num_samples_used[j] = num_processed_samples;

         }

         all_converged = 0;

         return all_converged;

     }


     IndexType i;

     DataType summand;

     DataType mean;

     DataType std;

     DataType data;


     // Quantile of normal distribution (usually known as the "z" coefficient)

     DataType standard_z_score = sqrt(2) * \

         static_cast<DataType>(erf_inv(static_cast<double>(confidence_level)));


     // For each column of samples, compute error of all processed rows

     for (j=0; j < num_inquiries; ++j)

     {

         // Do not check convergence if j-th column already converged

         if (converged[j] == 0)

         {

             // mean of j-th column using all processed rows of j-th column

             summand = 0.0;

             for (i=0; i < num_processed_samples; ++i)

             {

                 summand += samples[processed_samples_indices[i]][j];

             }

             mean = summand / num_processed_samples;


             // std of j-th column using all processed rows of j-th column

             if (num_processed_samples > 1)

             {

                 summand = 0.0;

                 for (i=0; i < num_processed_samples; ++i)

                 {

                     data = samples[processed_samples_indices[i]][j];

                     summand += (data - mean) * (data - mean);

                 }

                 std = sqrt(summand / (num_processed_samples - 1.0));

             }

             else

             {

                 std = INFINITY;

             }


             // Compute error based of std and confidence level

             error[j] = standard_z_score * std / sqrt(num_processed_samples);


             // Check error with atol and rtol to find if j-th column converged

             if (error[j] < std::max(error_atol, error_rtol*mean))

             {

                 converged[j] = 1;

             }


             // Update how many samples used so far to average j-th column

             num_samples_used[j] = num_processed_samples;

         }

     }


     // Check convergence is reached for all columns (all inquiries)

     all_converged = 1;

     for (j=0; j < num_inquiries; ++j)

     {

         if (converged[j] == 0)

         {

             // The j-th column not converged.

             all_converged = 0;

             break;

         }

     }


     return all_converged;

 }


 // =================

 // average estimates

 // =================


 template <typename DataType>

 void ConvergenceTools<DataType>::average_estimates(

         const DataType confidence_level,

         const DataType outlier_significance_level,

         const IndexType num_inquiries,

         const IndexType max_num_samples,

         const IndexType* num_samples_used,

         const IndexType* processed_samples_indices,

         DataType** samples,

         IndexType* num_outliers,

         DataType* trace,

         DataType* error)

 {

     IndexType i;

     IndexType j;

     DataType summand;

     DataType mean;

     DataType std;

     DataType mean_discrepancy;

     DataType outlier_half_interval;


     // Flag which samples are outliers

     FlagType* outlier_indices = new FlagType[max_num_samples];


     // Quantile of normal distribution (usually known as the "z" coefficient)

     DataType error_z_score = sqrt(2) * erf_inv(confidence_level);


     // Confidence level of outlier is the complement of significance level

     DataType outlier_confidence_level = 1.0 - outlier_significance_level;


     // Quantile of normal distribution area where is not considered as outlier

     DataType outlier_z_score = sqrt(2.0) * erf_inv(outlier_confidence_level);


     for (j=0; j < num_inquiries; ++j)

     {

         // Initialize outlier indices for each column of samples

         for (i=0; i < max_num_samples; ++i)

         {

             outlier_indices[i] = 0;

         }

         num_outliers[j] = 0;


         // Compute mean of the j-th column

         summand = 0.0;

         for (i=0; i < num_samples_used[j]; ++i)

         {

             summand += samples[processed_samples_indices[i]][j];

         }

         mean = summand / num_samples_used[j];


         // Compute std of the j-th column


         if (num_samples_used[j] > 1)

         {

             summand = 0.0;

             for (i=0; i < num_samples_used[j]; ++i)

             {

                 mean_discrepancy = \

                     samples[processed_samples_indices[i]][j] - mean;

                 summand += mean_discrepancy * mean_discrepancy;

             }

             std = sqrt(summand / (num_samples_used[j] - 1.0));

         }

         else

         {

             std = INFINITY;

         }


         // Outlier half interval

         outlier_half_interval = outlier_z_score * std;


         // Difference of each element from

         for (i=0; i < num_samples_used[j]; ++i)

         {

             mean_discrepancy = samples[processed_samples_indices[i]][j] - mean;

             if (std::abs(mean_discrepancy) > outlier_half_interval)

             {

                 // Outlier detected

                 outlier_indices[i] = 1;

                 num_outliers[j] += 1;

             }

         }


         // Reevaluate mean but leave out outliers

         summand = 0.0;

         for (i=0; i < num_samples_used[j]; ++i)

         {

             if (outlier_indices[i] == 0)

             {

                 summand += samples[processed_samples_indices[i]][j];

             }

         }

         mean = summand / (num_samples_used[j] - num_outliers[j]);


         // Reevaluate std but leave out outliers

         if (num_samples_used[j] > 1 + num_outliers[j])

         {

             summand = 0.0;

             for (i=0; i < num_samples_used[j]; ++i)

             {

                 if (outlier_indices[i] == 0)

                 {

                     mean_discrepancy = \

                         samples[processed_samples_indices[i]][j] - mean;

                     summand += mean_discrepancy * mean_discrepancy;

                 }

             }

             std = sqrt(summand/(num_samples_used[j] - num_outliers[j] - 1.0));

         }

         else

         {

             std = INFINITY;

         }


         // trace and its error

         trace[j] = mean;

         error[j] = error_z_score * std / \

             sqrt(num_samples_used[j] - num_outliers[j]);

     }


     delete[] outlier_indices;

 }


 // ===============================

 // Explicit template instantiation

 // ===============================


 template class ConvergenceTools<float>;

 template class ConvergenceTools<double>;

 template class ConvergenceTools<long double>;

ConvergenceTools
A static class to compute the trace of implicit matrix functions using stochastic Lanczos quadrature ...
Definition: convergence_tools.h:37

ConvergenceTools::check_convergence
static FlagType check_convergence(DataType **samples, const IndexType min_num_samples, const IndexType num_inquiries, const IndexType *processed_samples_indices, const IndexType num_processed_samples, const DataType confidence_level, const DataType error_atol, const DataType error_rtol, DataType *error, IndexType *num_samples_used, FlagType *converged)
Checks if the standard deviation of the set of the cumulative averages of trace estimators converged ...
Definition: convergence_tools.cpp:95

ConvergenceTools::average_estimates
static void average_estimates(const DataType confidence_level, const DataType outlier_significance_level, const IndexType num_inquiries, const IndexType max_num_samples, const IndexType *num_samples_used, const IndexType *processed_samples_indices, DataType **samples, IndexType *num_outliers, DataType *trace, DataType *error)
Averages the estimates of trace. Removes outliers and reevaluates the error to take into account for ...
Definition: convergence_tools.cpp:256

convergence_tools.h

erf_inv
double erf_inv(const double x)
Inverse error function.
Definition: special_functions.cpp:63

special_functions.h

FlagType
int FlagType
Definition: types.h:68

IndexType
int IndexType
Definition: types.h:65