imate
C++/CUDA Reference
Loading...
Searching...
No Matches
_cu_cast.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <sameli@berkeley.edu>
3 * SPDX-License-Identifier: BSD-3-Clause
4 * SPDX-FileType: SOURCE
5 *
6 * This program is free software: you can redistribute it and/or modify it
7 * under the terms of the license found in the LICENSE.txt file in the root
8 * directory of this source tree.
9 */
10
11#ifndef _CU_ARITHMETICS_CU_CAST_H_
12#define _CU_ARITHMETICS_CU_CAST_H_
13
14
15// =======
16// Headers
17// =======
18
19#include "../_cu_definitions/cu_types.h" // __nv_fp8_e5m2, __nv_fp8_e4m3,
20 // __half, __half2float, __float2half,
21 // __int2half_rn, __uint2half,
22 // __ll2half, __ull2half
23 // __nv_bfloat16, __bfloat162float,
24 // __float2bfloat16,
25 // __int2bfloat16_rn, __uint2bfloat16,
26 // __ll2bfloat16, __ull2bfloat16
27
28
29// ==============
30// cu arithmetics
31// ==============
32
46
47namespace cu_arithmetics
48{
49 // ====
50 // cast
51 // ====
52
62
63 template <typename InputDataType, typename OutputDataType>
65
66
67 // ====
68 // cast (__nv_fp8_e5m2 to float)
69 // ====
70
80
81 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
82 template<>
84 const __nv_fp8_e5m2 x)
85 {
86 return float(x);
87 }
88 #endif
89
90
91 // ====
92 // cast (float to __nv_fp8_e5m2)
93 // ====
94
104
105 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
106 template<>
108 const float x)
109 {
110 return __nv_fp8_e5m2(x);
111 }
112 #endif
113
114
115 // ====
116 // cast (__nv_fp8_e5m2 to double)
117 // ====
118
128
129 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
130 template<>
132 const __nv_fp8_e5m2 x)
133 {
134 return double(x);
135 }
136 #endif
137
138
139 // ====
140 // cast (double to __nv_fp8_e5m2)
141 // ====
142
152
153 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
154 template<>
156 const double x)
157 {
158 return __nv_fp8_e5m2(x);
159 }
160 #endif
161
162
163 // ====
164 // cast (__nv_fp8_e4m3 to float)
165 // ====
166
176
177 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
178 template<>
180 const __nv_fp8_e4m3 x)
181 {
182 return float(x);
183 }
184 #endif
185
186
187 // ====
188 // cast (float to __nv_fp8_e4m3)
189 // ====
190
200
201 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
202 template<>
204 const float x)
205 {
206 return __nv_fp8_e4m3(x);
207 }
208 #endif
209
210
211 // ====
212 // cast (__nv_fp8_e4m3 to double)
213 // ====
214
224
225 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
226 template<>
228 const __nv_fp8_e4m3 x)
229 {
230 return double(x);
231 }
232 #endif
233
234
235 // ====
236 // cast (double to __nv_fp8_e4m3)
237 // ====
238
248
249 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
250 template<>
252 const double x)
253 {
254 return __nv_fp8_e4m3(x);
255 }
256 #endif
257
258
259 // ====
260 // cast (__half to float)
261 // ====
262
272
273 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
274 template<>
276 {
277 return __half2float(x);
278 }
279 #endif
280
281
282 // ====
283 // cast (float to __half)
284 // ====
285
295
296 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
297 template<>
299 {
300 return __float2half(x);
301 }
302 #endif
303
304
305 // ====
306 // cast (__half to double)
307 // ====
308
318
319 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
320 template<>
321 inline __host__ __device__ double cast<__half, double>(const __half x)
322 {
323 return static_cast<double>(__half2float(x));
324 }
325 #endif
326
327
328 // ====
329 // cast (double to __half)
330 // ====
331
341
342 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
343 template<>
344 inline __host__ __device__ __half cast<double, __half>(const double x)
345 {
346 return __float2half(static_cast<float>(x));
347 }
348 #endif
349
350
351 // ====
352 // cast (__nv_bfloat16 to float)
353 // ====
354
364
365 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
366 template<>
368 const __nv_bfloat16 x)
369 {
370 return __bfloat162float(x);
371 }
372 #endif
373
374
375 // ====
376 // cast (float to __nv_bfloat16)
377 // ====
378
388
389 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
390 template<>
392 const float x)
393 {
394 return __float2bfloat16(x);
395 }
396 #endif
397
398
399 // ====
400 // cast (__nv_bfloat16 to double)
401 // ====
402
412
413 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
414 template<>
416 const __nv_bfloat16 x)
417 {
418 return static_cast<double>(__bfloat162float(x));
419 }
420 #endif
421
422
423 // ====
424 // cast (double to __nv_bfloat16)
425 // ====
426
436
437 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
438 template<>
440 const double x)
441 {
442 return __float2bfloat16(static_cast<float>(x));
443 }
444 #endif
445
446
447 // ====
448 // cast (float to float)
449 // ====
450
460
461 template<>
463 const float x)
464 {
465 return x;
466 }
467
468
469 // ====
470 // cast (float to double)
471 // ====
472
482
483 template<>
485 const float x)
486 {
487 return static_cast<double>(x);
488 }
489
490
491 // ====
492 // cast (double to double)
493 // ====
494
504
505 template<>
507 const double x)
508 {
509 return x;
510 }
511
512
513 // ====
514 // cast (double to float)
515 // ====
516
526
527 template<>
529 const double x)
530 {
531 return static_cast<float>(x);
532 }
533
534
535 // ====
536 // cast (int to __nv_fp8_e5m2)
537 // ====
538
549
550 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
551 template<>
553 const int x)
554 {
555 return __nv_fp8_e5m2(x);
556 }
557 #endif
558
559
560 // ====
561 // cast (int to __nv_fp8_e4m3)
562 // ====
563
574
575 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
576 template<>
578 const int x)
579 {
580 return __nv_fp8_e4m3(x);
581 }
582 #endif
583
584
585 // ====
586 // cast (int to __half)
587 // ====
588
599
600 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
601 template<>
603 {
604 return __int2half_rn(x);
605 }
606 #endif
607
608
609 // ====
610 // cast (int to __nv_bfloat16)
611 // ====
612
623
624 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
625 template<>
627 const int x)
628 {
629 return __int2bfloat16_rn(x);
630 }
631 #endif
632
633
634 // ====
635 // cast (int to float)
636 // ====
637
648
649 template<>
651 const int x)
652 {
653 return static_cast<float>(x);
654 }
655
656
657 // ====
658 // cast (float to int)
659 // ====
660
671
672 template<>
674 const float x)
675 {
676 return static_cast<int>(x);
677 }
678
679
680 // ====
681 // cast (int to double)
682 // ====
683
694
695 template<>
697 const int x)
698 {
699 return static_cast<double>(x);
700 }
701
702
703 // ====
704 // cast (double to int)
705 // ====
706
717
718 template<>
720 const double x)
721 {
722 return static_cast<int>(x);
723 }
724
725
726 // ====
727 // cast (unsigned int to __nv_fp8_e5m2)
728 // ====
729
740
741 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
742 template<>
744 const unsigned int x)
745 {
746 return __nv_fp8_e5m2(x);
747 }
748 #endif
749
750
751 // ====
752 // cast (unsigned int to __nv_fp8_e4m3)
753 // ====
754
765
766 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
767 template<>
769 const unsigned int x)
770 {
771 return __nv_fp8_e4m3(x);
772 }
773 #endif
774
775
776 // ====
777 // cast (unsigned int to __half)
778 // ====
779
790
791 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
792 template<>
794 const unsigned int x)
795 {
796 return __uint2half_rn(x);
797 }
798 #endif
799
800
801 // ====
802 // cast (unsigned int to __nv_bfloat16)
803 // ====
804
815
816 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
817 template<>
819 const unsigned int x)
820 {
821 return __uint2bfloat16_rn(x);
822 }
823 #endif
824
825
826 // ====
827 // cast (unsigned int to float)
828 // ====
829
840
841 template<>
843 const unsigned int x)
844 {
845 return static_cast<float>(x);
846 }
847
848
849 // ====
850 // cast (float to unsigned int)
851 // ====
852
863
864 template<>
866 const float x)
867 {
868 return static_cast<unsigned int>(x);
869 }
870
871
872 // ====
873 // cast (unsigned int to double)
874 // ====
875
886
887 template<>
889 const unsigned int x)
890 {
891 return static_cast<double>(x);
892 }
893
894
895 // ====
896 // cast (double to unsigned int)
897 // ====
898
909
910 template<>
912 const double x)
913 {
914 return static_cast<unsigned int>(x);
915 }
916
917
918 // ====
919 // cast (long long int to __nv_fp8_e5m2)
920 // ====
921
932
933 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
934 template<>
936 long long int, __nv_fp8_e5m2>(
937 const long long int x)
938 {
939 return __nv_fp8_e5m2(x);
940 }
941 #endif
942
943
944 // ====
945 // cast (long long int to __nv_fp8_e4m3)
946 // ====
947
958
959 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
960 template<>
962 long long int, __nv_fp8_e4m3>(
963 const long long int x)
964 {
965 return __nv_fp8_e4m3(x);
966 }
967 #endif
968
969
970 // ====
971 // cast (long long int to __half)
972 // ====
973
984
985 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
986 template<>
988 const long long int x)
989 {
990 return __ll2half_rn(x);
991 }
992 #endif
993
994
995 // ====
996 // cast (long long int to __nv_bfloat16)
997 // ====
998
1009
1010 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
1011 template<>
1013 long long int, __nv_bfloat16>(
1014 const long long int x)
1015 {
1016 return __ll2bfloat16_rn(x);
1017 }
1018 #endif
1019
1020
1021 // ====
1022 // cast (long long int to float)
1023 // ====
1024
1035
1036 template<>
1038 const long long int x)
1039 {
1040 return static_cast<float>(x);
1041 }
1042
1043
1044 // ====
1045 // cast (float to long long int)
1046 // ====
1047
1058
1059 template<>
1061 const float x)
1062 {
1063 return static_cast<long long int>(x);
1064 }
1065
1066
1067 // ====
1068 // cast (long long int to double)
1069 // ====
1070
1081
1082 template<>
1084 const long long int x)
1085 {
1086 return static_cast<double>(x);
1087 }
1088
1089
1090 // ====
1091 // cast (double to long long int)
1092 // ====
1093
1104
1105 template<>
1107 const double x)
1108 {
1109 return static_cast<long long int>(x);
1110 }
1111
1112
1113 // ====
1114 // cast (unsigned long long int to __nv_fp8_e5m2)
1115 // ====
1116
1127
1128 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
1129 template<>
1131 unsigned long long int, __nv_fp8_e5m2>(
1132 const unsigned long long int x)
1133 {
1134 return __nv_fp8_e5m2(x);
1135 }
1136 #endif
1137
1138
1139 // ====
1140 // cast (unsigned long long int to __nv_fp8_e4m3)
1141 // ====
1142
1153
1154 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
1155 template<>
1157 unsigned long long int, __nv_fp8_e4m3>(
1158 const unsigned long long int x)
1159 {
1160 return __nv_fp8_e4m3(x);
1161 }
1162 #endif
1163
1164
1165 // ====
1166 // cast (unsigned long long int to __half)
1167 // ====
1168
1179
1180 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
1181 template<>
1183 const unsigned long long int x)
1184 {
1185 return __ull2half_rn(x);
1186 }
1187 #endif
1188
1189
1190 // ====
1191 // cast (unsigned long long int to __nv_bfloat16)
1192 // ====
1193
1204
1205 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
1206 template<>
1208 unsigned long long int, __nv_bfloat16>(
1209 const unsigned long long int x)
1210 {
1211 return __ull2bfloat16_rn(x);
1212 }
1213 #endif
1214
1215
1216 // ====
1217 // cast (unsigned long long int to float)
1218 // ====
1219
1230
1231 template<>
1233 const unsigned long long int x)
1234 {
1235 return static_cast<float>(x);
1236 }
1237
1238
1239 // ====
1240 // cast (float to unsigned long long int)
1241 // ====
1242
1253
1254 template<>
1255 inline __host__ __device__ unsigned long long int cast<
1256 float, unsigned long long int>(
1257 const float x)
1258 {
1259 return static_cast<unsigned long long int>(x);
1260 }
1261
1262
1263 // ====
1264 // cast (unsigned long long int to double)
1265 // ====
1266
1277
1278 template<>
1280 const unsigned long long int x)
1281 {
1282 return static_cast<double>(x);
1283 }
1284
1285
1286 // ====
1287 // cast (double to unsigned long long int)
1288 // ====
1289
1300
1301 template<>
1302 inline __host__ __device__ unsigned long long int cast<
1303 double, unsigned long long int>(
1304 const double x)
1305 {
1306 return static_cast<unsigned long long int>(x);
1307 }
1308
1309} // namespace cu_arithmetics
1310
1311#endif // _CU_ARITHMETICS_CU_CAST_H_
Cast from float to __half and __nv_bfloat16 types and vice-versa, and float to double and vice-versa.
Definition _cu_abs.h:43
__host__ __device__ DataType abs(const DataType x)
Absolute value of a floating point number.
__host__ __device__ double cast< float, double >(const float x)
Cast float type to double type.
Definition _cu_cast.h:484
__host__ __device__ long long int cast< double, long long int >(const double x)
Cast double type to long long int type in round-to-nearest-even mode.
Definition _cu_cast.h:1106
__host__ __device__ int cast< float, int >(const float x)
Cast float type to int type in round-to-nearest-even mode.
Definition _cu_cast.h:673
__host__ __device__ float cast< int, float >(const int x)
Cast int type to __nv_fp8_e5m2 type in round-to-nearest-even mode.
Definition _cu_cast.h:650
__host__ __device__ double cast< unsigned int, double >(const unsigned int x)
Cast unsigned int type to double type in round-to-nearest-even mode.
Definition _cu_cast.h:888
__host__ __device__ double cast< unsigned long long int, double >(const unsigned long long int x)
Cast unsigned long long int type to double type in round-to-nearest-even mode.
Definition _cu_cast.h:1279
__host__ __device__ unsigned int cast< double, unsigned int >(const double x)
Cast double type to unsigned int type in round-to-nearest-even mode.
Definition _cu_cast.h:911
__host__ __device__ double cast< double, double >(const double x)
Cast double type to double type (no action needed)
Definition _cu_cast.h:506
__host__ __device__ float cast< long long int, float >(const long long int x)
Cast long long int type to __nv_fp8_e5m2 type in round-to-nearest-even mode.
Definition _cu_cast.h:1037
__host__ __device__ double cast< int, double >(const int x)
Cast int type to double type in round-to-nearest-even mode.
Definition _cu_cast.h:696
__host__ __device__ OutputDataType cast(const InputDataType x)
Cast a floating point type to another floating point type.
__host__ __device__ float cast< float, float >(const float x)
Cast __nv_fp8_e5m2 type to float type.
Definition _cu_cast.h:462
__host__ __device__ unsigned int cast< float, unsigned int >(const float x)
Cast float type to unsigned int type in round-to-nearest-even mode.
Definition _cu_cast.h:865
__host__ __device__ long long int cast< float, long long int >(const float x)
Cast float type to long long int type in round-to-nearest-even mode.
Definition _cu_cast.h:1060
__host__ __device__ float cast< unsigned long long int, float >(const unsigned long long int x)
Cast unsigned long long int type to __nv_fp8_e5m2 type in round-to-nearest-even mode.
Definition _cu_cast.h:1232
__host__ __device__ float cast< unsigned int, float >(const unsigned int x)
Cast unsigned int type to __nv_fp8_e5m2 type in round-to-nearest-even mode.
Definition _cu_cast.h:842
__host__ __device__ int cast< double, int >(const double x)
Cast double type to int type in round-to-nearest-even mode.
Definition _cu_cast.h:719
__host__ __device__ double cast< long long int, double >(const long long int x)
Cast long long int type to double type in round-to-nearest-even mode.
Definition _cu_cast.h:1083
__host__ __device__ float cast< double, float >(const double x)
Cast double type to float type.
Definition _cu_cast.h:528