imate
C++/CUDA Reference
random_array_generator.cpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <sameli@berkeley.edu>
3  * SPDX-License-Identifier: BSD-3-Clause
4  * SPDX-FileType: SOURCE
5  *
6  * This program is free software: you can redistribute it and/or modify it
7  * under the terms of the license found in the LICENSE.txt file in the root
8  * directory of this source tree.
9  */
10 
11 
12 // =======
13 // Headers
14 // =======
15 
17 #include <omp.h> // omp_set_num_threads
18 #include <stdint.h> // uint64_t
19 #include "../_c_trace_estimator/c_orthogonalization.h" // cOrthogonalization
20 #include "../_c_basic_algebra/c_vector_operations.h" // cVectorOperations
21 
22 
23 // =====================
24 // generate random array
25 // =====================
26 
61 
62 template <typename DataType>
64  RandomNumberGenerator& random_number_generator,
65  DataType* array,
66  const LongIndexType array_size,
67  const IndexType num_threads)
68 {
69  // Set the number of threads only if num_threads is non-zero. If
70  // num_threads is zero, it indicates to not create new threads in this
71  // function, rather, this function was called from another caller function
72  // that was already parallelized and this function should be executed in
73  // one of those threads.
74  if (num_threads > 0)
75  {
76  // num_threads parallel threads will be created in this function.
77  omp_set_num_threads(num_threads);
78  }
79 
80  // Finding the thread id. It depends where we call omp_get_thread_num. If
81  // we call it here (below), it gives the thread id of the parent function.
82  // But if we call it inside a #pragma omp parallel, it gives the newer
83  // thread id that is "created" by the pragma.
84  int thread_id = 0;
85  if (num_threads == 0)
86  {
87  // If num_threads is zero (which means we will not create a new thread
88  // in this function), we get the thread id of the parent function.
89  thread_id = omp_get_thread_num();
90  }
91 
92  // Number of bits to generate in each call of the random generator. This is
93  // the number of bits in a uint64_t integer.
94  const int bits_per_byte = 8;
95  const int num_bits = sizeof(uint64_t) * bits_per_byte;
96 
97  // Number of chunks of divided array each in num_bits length
98  const LongIndexType num_chunks = \
99  static_cast<LongIndexType>(array_size/num_bits);
100 
101  // Shared-memory parallelism over individual row vectors. The parallel
102  // section only works if num_threads is non-zero. Otherwise it runs in
103  // serial order.
104  #pragma omp parallel if (num_threads > 0)
105  {
106  // If num_threads is zero, the following thread_id is the thread id
107  // that the parent (caller) function created outside of this function.
108  // But, if num_thread is non-zero, the following thread id is the
109  // thread id that is created inside this parallel loop.
110  if (num_threads > 0)
111  {
112  thread_id = omp_get_thread_num();
113  }
114 
115  #pragma omp for schedule(static)
116  for (LongIndexType i=0; i < num_chunks; ++i)
117  {
118  // Generate 64 bits (one integer)
119  uint64_t bits = random_number_generator.next(thread_id);
120 
121  // Fill 64 elements of array with +1 or -1 depending on the bits
122  for (IndexType j=0; j < num_bits; ++j)
123  {
124  // Check if the j-th bit (from right to left) is 1 or 0
125  if (bits & ( uint64_t(1) << j))
126  {
127  // Bit is 1. Write +1.0 in array
128  array[i*num_bits + j] = 1.0;
129  }
130  else
131  {
132  // Bit is 0. Write -1.0 in array
133  array[i*num_bits + j] = -1.0;
134  }
135  }
136  }
137  }
138 
139  // The previous for loop (the above) does not fill all elements of array
140  // since it only iterates on the multiples of 64 (num_bits). We fill the
141  // rest of the array. There are less than 64 elements remained to fill. So,
142  // it suffice to generate only 64 bits (one integer) for the rest of the
143  // elements of the array
144  uint64_t bits = random_number_generator.next(thread_id);
145  IndexType j = 0;
146 
147  // This loop should have less than 64 iterations.
148  for (LongIndexType i = num_chunks * num_bits; i < array_size; ++i)
149  {
150  // Check if the j-th bit (from right to left) is 1 or 0
151  if (bits & ( uint64_t(1) << j))
152  {
153  // Bit is 1. Write +1.0 in array
154  array[i] = 1.0;
155  }
156  else
157  {
158  // Bit is 0. Write -1.0 in array
159  array[i] = -1.0;
160  }
161 
162  // Increment bit counter
163  ++j;
164  }
165 }
166 
167 
168 // ===============================
169 // Explicit template instantiation
170 // ===============================
171 
172 template class RandomArrayGenerator<float>;
173 template class RandomArrayGenerator<double>;
174 template class RandomArrayGenerator<long double>;
A static class to generate random set of vectors. This class acts as a templated namespace,...
static void generate_random_array(RandomNumberGenerator &random_number_generator, DataType *array, const LongIndexType array_size, const IndexType num_threads)
Generates a pseudo-random array with Rademacher distribution where elements are either +1 or -1.
Generates 64-bit integers on multiple parallel threads.
uint64_t next(const int thread_id)
Generates the next random number in the sequence, depending on the thread id.
int LongIndexType
Definition: types.h:60
int IndexType
Definition: types.h:65