imate
C++/CUDA Reference
Loading...
Searching...
No Matches
random_array_generator.cpp
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <sameli@berkeley.edu>
3 * SPDX-License-Identifier: BSD-3-Clause
4 * SPDX-FileType: SOURCE
5 *
6 * This program is free software: you can redistribute it and/or modify it
7 * under the terms of the license found in the LICENSE.txt file in the root
8 * directory of this source tree.
9 */
10
11
12// =======
13// Headers
14// =======
15
17#include "../_definitions/definitions.h" // USE_OPENMP
18#if defined(USE_OPENMP) && (USE_OPENMP == 1)
19 #include <omp.h> // omp_set_num_threads
20#endif
21#include <stdint.h> // uint64_t
22#include "../_c_trace_estimator/c_orthogonalization.h" // cOrthogonalization
23#include "../_c_basic_algebra/c_vector_operations.h" // cVectorOperations
24
25
26// =====================
27// generate random array
28// =====================
29
64
65template <typename DataType>
67 RandomNumberGenerator& random_number_generator,
68 DataType* array,
69 const LongIndexType array_size,
70 const IndexType num_threads)
71{
72 // Set the number of threads only if num_threads is non-zero. If
73 // num_threads is zero, it indicates to not create new threads in this
74 // function, rather, this function was called from another caller function
75 // that was already parallelized and this function should be executed in
76 // one of those threads.
77 if (num_threads > 0)
78 {
79 // num_threads parallel threads will be created in this function.
80 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
81 omp_set_num_threads(num_threads);
82 #endif
83 }
84
85 // Finding the thread id. It depends where we call omp_get_thread_num. If
86 // we call it here (below), it gives the thread id of the parent function.
87 // But if we call it inside a #pragma omp parallel, it gives the newer
88 // thread id that is "created" by the pragma.
89 int thread_id = 0;
90 if (num_threads == 0)
91 {
92 // If num_threads is zero (which means we will not create a new thread
93 // in this function), we get the thread id of the parent function.
94 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
95 thread_id = omp_get_thread_num();
96 #else
97 thread_id = 0;
98 #endif
99 }
100
101 // Number of bits to generate in each call of the random generator. This is
102 // the number of bits in a uint64_t integer.
103 const int bits_per_byte = 8;
104 const int num_bits = sizeof(uint64_t) * bits_per_byte;
105
106 // Number of chunks of divided array each in num_bits length
107 const LongIndexType num_chunks = \
108 static_cast<LongIndexType>(array_size/num_bits);
109
110 // Shared-memory parallelism over individual row vectors. The parallel
111 // section only works if num_threads is non-zero. Otherwise it runs in
112 // serial order.
113 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
114 #pragma omp parallel if (num_threads > 0)
115 #endif
116 {
117 // If num_threads is zero, the following thread_id is the thread id
118 // that the parent (caller) function created outside of this function.
119 // But, if num_thread is non-zero, the following thread id is the
120 // thread id that is created inside this parallel loop.
121 if (num_threads > 0)
122 {
123 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
124 thread_id = omp_get_thread_num();
125 #else
126 thread_id = 0;
127 #endif
128 }
129
130 #if defined(USE_OPENMP) && (USE_OPENMP == 1)
131 #pragma omp for schedule(static)
132 #endif
133 for (LongIndexType i=0; i < num_chunks; ++i)
134 {
135 // Generate 64 bits (one integer)
136 uint64_t bits = random_number_generator.next(thread_id);
137
138 // Fill 64 elements of array with +1 or -1 depending on the bits
139 for (IndexType j=0; j < num_bits; ++j)
140 {
141 // Check if the j-th bit (from right to left) is 1 or 0
142 if (bits & ( uint64_t(1) << j))
143 {
144 // Bit is 1. Write +1.0 in array
145 array[i*num_bits + j] = 1.0;
146 }
147 else
148 {
149 // Bit is 0. Write -1.0 in array
150 array[i*num_bits + j] = -1.0;
151 }
152 }
153 }
154 }
155
156 // The previous for loop (the above) does not fill all elements of array
157 // since it only iterates on the multiples of 64 (num_bits). We fill the
158 // rest of the array. There are less than 64 elements remained to fill. So,
159 // it suffice to generate only 64 bits (one integer) for the rest of the
160 // elements of the array
161 uint64_t bits = random_number_generator.next(thread_id);
162 IndexType j = 0;
163
164 // This loop should have less than 64 iterations.
165 for (LongIndexType i = num_chunks * num_bits; i < array_size; ++i)
166 {
167 // Check if the j-th bit (from right to left) is 1 or 0
168 if (bits & ( uint64_t(1) << j))
169 {
170 // Bit is 1. Write +1.0 in array
171 array[i] = 1.0;
172 }
173 else
174 {
175 // Bit is 0. Write -1.0 in array
176 array[i] = -1.0;
177 }
178
179 // Increment bit counter
180 ++j;
181 }
182}
183
184
185// ===============================
186// Explicit template instantiation
187// ===============================
188
189template class RandomArrayGenerator<float>;
190template class RandomArrayGenerator<double>;
A static class to generate random set of vectors. This class acts as a templated namespace,...
static void generate_random_array(RandomNumberGenerator &random_number_generator, DataType *array, const LongIndexType array_size, const IndexType num_threads)
Generates a pseudo-random array with Rademacher distribution where elements are either +1 or -1.
Generates 64-bit integers on multiple parallel threads.
uint64_t next(const int thread_id)
Generates the next random number in the sequence, depending on the thread id.
void omp_set_num_threads(int num_threads)
int omp_get_thread_num()
int LongIndexType
Definition types.h:60
int IndexType
Definition types.h:65