11#ifndef _CU_ARITHMETICS_CU_MUL_H_
12#define _CU_ARITHMETICS_CU_MUL_H_
18#include "../_cu_definitions/cu_types.h"
58 template <
typename DataType>
80 #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)
110 #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)
140 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
167 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
194 #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)
221 #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)
250 template <
typename DataType>
275 #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)
305 #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)
335 #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)
365 #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)
Cast from float to __half and __nv_bfloat16 types and vice-versa, and float to double and vice-versa.
__host__ __device__ DataType mul(const DataType x, const DataType y)
Multiply two floating point numbers in round-to-nearest-even mode.
__host__ __device__ DataType abs(const DataType x)
Absolute value of a floating point number.
__host__ __device__ double mul< double >(const double x, const double y)
Multiply two double type numbers in round-to-nearest-even mode.
__host__ __device__ float mul< float >(const float x, const float y)
Multiply two __nv_fp8_e5m2 type numbers in round-to-nearest-even mode.