18#include "../_c_arithmetics/c_arithmetics.h"
19#include "../_definitions/definitions.h"
22#if defined(USE_OPENMP) && (USE_OPENMP == 1)
26#if defined(USE_ANY_CBLAS) && (USE_ANY_CBLAS == 1)
44template <
typename DataType>
46 const DataType*
RESTRICT input_vector,
50 #if defined(USE_ANY_CBLAS) && (USE_ANY_CBLAS == 1)
56 cblas_api::xcopy(vector_size, input_vector, incx, output_vector, incy);
61 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
62 #pragma omp parallel for \
64 if ((!omp_in_parallel()) && (vector_size >= LARGE_ARRAY_SIZE)) \
66 shared(input_vector, output_vector, vector_size)
70 output_vector[i] = input_vector[i];
92template <
typename DataType>
94 const DataType*
RESTRICT input_vector,
99 #if defined(USE_ANY_CBLAS) && (USE_ANY_CBLAS == 1)
105 cblas_api::xcopy(vector_size, input_vector, incx, output_vector, incy);
107 cblas_api::xscal(vector_size, scale, output_vector, incy);
112 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
113 #pragma omp parallel for \
115 if ((!omp_in_parallel()) && (vector_size >= LARGE_ARRAY_SIZE)) \
117 shared(input_vector, output_vector, vector_size, scale)
121 output_vector[i] = scale * input_vector[i];
152template <
typename DataType>
154 const DataType*
RESTRICT input_vector,
156 const DataType scale,
160 #if defined(USE_ANY_CBLAS) && (USE_ANY_CBLAS == 1)
166 DataType neg_scale = -scale;
167 cblas_api::xaxpy(vector_size, neg_scale, input_vector, incx, output_vector,
179 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
180 #pragma omp parallel for \
182 if ((!omp_in_parallel()) && (vector_size >= LARGE_ARRAY_SIZE)) \
184 shared(input_vector, output_vector, vector_size, scale)
188 output_vector[i] -= scale * input_vector[i];
229template <
typename DataType>
235 #if defined(USE_ANY_CBLAS) && (USE_ANY_CBLAS == 1)
241 DataType inner_prod = cblas_api::xdot(vector_size, vector1, incx, vector2,
249 long double inner_prod = 0.0;
250 #if defined(USE_LOOP_UNROLLING) && (USE_LOOP_UNROLLING == 1)
252 LongIndexType vector_size_chunked = vector_size - (vector_size % chunk);
255 #if defined(USE_LOOP_UNROLLING) && (USE_LOOP_UNROLLING == 1)
256 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
257 #pragma omp parallel for \
259 if ((!omp_in_parallel()) && \
260 (vector_size_chunked >= LARGE_ARRAY_SIZE)) \
262 shared(vector1, vector2, vector_size, vector_size_chunked, chunk) \
263 reduction(+: inner_prod)
267 inner_prod += vector1[i] * vector2[i] +
268 vector1[i+1] * vector2[i+1] +
269 vector1[i+2] * vector2[i+2] +
270 vector1[i+3] * vector2[i+3] +
271 vector1[i+4] * vector2[i+4];
275 #if defined(USE_LOOP_UNROLLING) && (USE_LOOP_UNROLLING == 1)
276 for (
LongIndexType i=vector_size_chunked; i < vector_size; ++i)
279 #pragma omp parallel
for \
283 shared(vector1, vector2, vector_size) \
284 reduction(+: inner_prod)
289 inner_prod += vector1[i] * vector2[i];
292 return static_cast<DataType
>(inner_prod);
331template <
typename DataType>
336 #if defined(USE_ANY_CBLAS) && (USE_ANY_CBLAS == 1)
341 DataType norm = cblas_api::xnrm2(vector_size, vector, incx);
348 long double norm2 = 0.0;
349 #if defined(USE_LOOP_UNROLLING) && (USE_LOOP_UNROLLING == 1)
351 LongIndexType vector_size_chunked = vector_size - (vector_size % chunk);
354 #if defined(USE_LOOP_UNROLLING) && (USE_LOOP_UNROLLING == 1)
355 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
356 #pragma omp parallel for \
358 if ((!omp_in_parallel()) && \
359 (vector_size_chunked >= LARGE_ARRAY_SIZE)) \
361 shared(vector, vector_size, vector_size_chunked, chunk) \
366 norm2 += vector[i] * vector[i] +
367 vector[i+1] * vector[i+1] +
368 vector[i+2] * vector[i+2] +
369 vector[i+3] * vector[i+3] +
370 vector[i+4] * vector[i+4];
374 #if defined(USE_LOOP_UNROLLING) && (USE_LOOP_UNROLLING == 1)
375 for (
LongIndexType i=vector_size_chunked; i < vector_size; ++i)
378 #pragma omp parallel
for \
382 shared(vector, vector_size) \
388 norm2 += vector[i] * vector[i];
392 DataType norm = std::sqrt(
static_cast<DataType
>(norm2));
413template <
typename DataType>
418 #if defined(USE_ANY_CBLAS) && (USE_ANY_CBLAS == 1)
422 vector, vector_size);
425 DataType scale = 1.0 / norm;
427 cblas_api::xscal(vector_size, scale, vector, incx);
438 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
439 #pragma omp parallel for \
441 if ((!omp_in_parallel()) && (vector_size >= LARGE_ARRAY_SIZE)) \
443 shared(vector, vector_size, norm)
471template <
typename DataType>
477 #if defined(USE_ANY_CBLAS) && (USE_ANY_CBLAS == 1)
481 vector, vector_size);
484 DataType scale = 1.0 / norm;
497 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
498 #pragma omp parallel for \
500 if ((!omp_in_parallel()) && (vector_size >= LARGE_ARRAY_SIZE)) \
502 shared(vector, output_vector, vector_size, norm)
506 output_vector[i] = vector[i] / norm;
A static class for vector operations, similar to level-1 operations of the BLAS library....
static void copy_scaled_vector(const DataType *RESTRICT input_vector, const LongIndexType vector_size, const DataType scale, DataType *RESTRICT output_vector)
Scales a vector and stores to a new vector.
static DataType inner_product(const DataType *RESTRICT vector1, const DataType *RESTRICT vector2, const LongIndexType vector_size)
Computes Euclidean inner product of two vectors.
static DataType normalize_vector_and_copy(const DataType *RESTRICT vector, const LongIndexType vector_size, DataType *RESTRICT output_vector)
Normalizes a vector based on Euclidean 2-norm. The result is written into another vector.
static void copy_vector(const DataType *RESTRICT input_vector, const LongIndexType vector_size, DataType *RESTRICT output_vector)
Copies a vector to a new vector. Result is written in-place.
static DataType normalize_vector_in_place(DataType *RESTRICT vector, const LongIndexType vector_size)
Normalizes a vector based on Euclidean 2-norm. The result is written in-place.
static void subtract_scaled_vector(const DataType *RESTRICT input_vector, const LongIndexType vector_size, const DataType scale, DataType *RESTRICT output_vector)
Subtracts the scaled input vector from the output vector.
static DataType euclidean_norm(const DataType *RESTRICT vector, const LongIndexType vector_size)
Computes the Euclidean norm of a 1D array.
bool is_equal(DataType x, DataType y)
Check if two floating point numbers are equal within a tolerance.