imate
C++/CUDA Reference
definitions.h
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <sameli@berkeley.edu>
3  * SPDX-License-Identifier: BSD-3-Clause
4  * SPDX-FileType: SOURCE
5  *
6  * This program is free software: you can redistribute it and/or modify it
7  * under the terms of the license found in the LICENSE.txt file in the root
8  * directory of this source tree.
9  */
10 
11 
12 #ifndef _DEFINITIONS_DEFINITIONS_H_
13 #define _DEFINITIONS_DEFINITIONS_H_
14 
15 
16 // ===========
17 // Definitions
18 // ===========
19 
20 // To suppress warning: __STDC_VERSION__" is not defined, evaluates to 0
21 // #ifndef __STDC_VERSION__
22 // #define __STDC_VERSION__ 0
23 // #endif
24 
25 // If set to 0, the LongIndexType is declared as 32-bit integer. Whereas if set
26 // to 1, the LongIndexType is declared as 64-bit integer. The long integer will
27 // slow down the performance on reading array if integers. Note that in C++,
28 // there is no difference between "int" and "long int". That is, both are 32
29 // bit. To see the real effect of long type, define the integer by "long long"
30 // rather than "long int". The "long long" is indeed 64-bit. Currently, the
31 // long type in "./types.h" is defined as "long int". Hence, setting LONG_INT
32 // to 1 will not make any difference unless "long long" is used.
33 //
34 // Note: The malloc and cudaMalloc can only allocate at maximum, an array of
35 // the limit size of "size_t" (unsigned int). So, using "long long int" is
36 // not indeed practical for malloc. Thus, it is better to set the type of array
37 // indices as just "signed int".
38 #ifndef LONG_INT
39  #define LONG_INT 0
40 #endif
41 
42 // If set to 0, the LongIndexType is declared as signed integer, whereas if set
43 // to 1, the LongIndexType is declared as unsigned integer. The unsigned type
44 // will double the limit of the largest integer index, while keeps the same
45 // speed for index operations. Note that the indices and index pointers of
46 // scipy sparse arrays are defined by "signed int". Hence, by setting
47 // UNSIGNED_LONG_INT to 1, there is a one-time overhead of convening the numpy
48 // int arrays (two matrices of scipy.sparse.csr_matrix.indices and
49 // scipy.sparse.csr_matrix.indptr) from "int" to "unsigned int". This overhead
50 // is only one-time and should be around half a second for moderate to large
51 // arrays. But, on the positive side, the unsigned int can handle arrays of
52 // up to twice the index size.
53 //
54 // Note: The malloc and cudaMalloc can only allocate at maximum, an array of
55 // the limit size of "size_t" (unsigned int). So, using "unsigned int" for
56 // index is not indeed practical since the array size in bytes is the size of
57 // array times sizeof(DataType). That is, if DataType is double for instance,
58 // the maximum array size could potentially be 8 times the size of maximum
59 // of "size_t" (unsigned int) which is not possible for malloc. Thus, it is
60 // better to set the type of array indices as just "signed int".
61 #ifndef UNSIGNED_LONG_INT
62  #define UNSIGNED_LONG_INT 0
63 #endif
64 
65 // If USE_CBLAS is set to 1, the OpenBlas library is used for dense vector and
66 // matrix operations. Note that Openblas does not declare operations on "long
67 // double" type, rather, only "float" and "double" types are supported. To use
68 // "long double" type, set USE_CBLAS to 0. Openblas is nearly twice faster, but
69 // it looses accuracy on large arrays of float type. This inaccuracy could
70 // matter a lot when computing dot product and norm of very large vectors.
71 #ifndef USE_CBLAS
72  #define USE_CBLAS 0
73 #endif
74 
75 
76 #endif // _DEFINITIONS_DEFINITIONS_H_