doxygen/html/__cu__sub_8h_source.html

/*

 *  SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <sameli@berkeley.edu>

 *  SPDX-License-Identifier: BSD-3-Clause

 *  SPDX-FileType: SOURCE

 *

 *  This program is free software: you can redistribute it and/or modify it

 *  under the terms of the license found in the LICENSE.txt file in the root

 *  directory of this source tree.

 */


#ifndef _CU_ARITHMETICS_CU_SUB_H_

#define _CU_ARITHMETICS_CU_SUB_H_


// =======

// Headers

// =======


#include "../_cu_definitions/cu_types.h" // __nv_fp8_e5m2, __nv_fp8_e4m3,

                                         // __half, __nv_bfloat16, __hsub

#include <cassert>  // assert


// =============

// cu arithmetic

// =============


namespace cu_arithmetics

{

    // ===

    // sub

    // ===


    template <typename DataType>

    inline __host__ __device__ DataType sub(

            const DataType x,

            const DataType y);


    // ===

    // sub (__nv_fp8_e5m2)

    // ===


    #if defined(USE_CUDA_FP8_E5M2) && (USE_CUDA_FP8_E5M2 == 1)

    template<>

    inline __host__ __device__ __nv_fp8_e5m2 sub<__nv_fp8_e5m2>(

            const __nv_fp8_e5m2 x,

            const __nv_fp8_e5m2 y)

    {

        // Not implemented

        assert(false);


        return __nv_fp8_e5m2(NAN);

    }

    #endif


    // ===

    // sub (__nv_fp8_e4m3)

    // ===


    #if defined(USE_CUDA_FP8_E4M3) && (USE_CUDA_FP8_E4M3 == 1)

    template<>

    inline __host__ __device__ __nv_fp8_e4m3 sub<__nv_fp8_e4m3>(

            const __nv_fp8_e4m3 x,

            const __nv_fp8_e4m3 y)

    {

        // Not implemented

        assert(false);


        return __nv_fp8_e4m3(NAN);

    }

    #endif


    // ===

    // sub (__half)

    // ===


    #if defined(USE_CUDA_FP16) && (USE_CUDA_FP16 == 1)

    template<>

    inline __host__ __device__ __half sub<__half>(

            const __half x,

            const __half y)

    {

        return __hsub(x, y);

    }

    #endif


    // ===

    // sub (__nv_bfloat16)

    // ===


    #if defined(USE_CUDA_BF16) && (USE_CUDA_BF16 == 1)

    template<>

    inline __host__ __device__ __nv_bfloat16 sub<__nv_bfloat16>(

            const __nv_bfloat16 x,

            const __nv_bfloat16 y)

    {

        return __hsub(x, y);

    }

    #endif


    // ===

    // sub (float)

    // ===


    #if defined(USE_CUDA_FP32) && (USE_CUDA_FP32 == 1)

    template<>


    inline __host__ __device__ float sub<float>(

            const float x,

            const float y)

    {

        return x - y;

    }


    #endif


    // ===

    // sub (double)

    // ===


    #if defined(USE_CUDA_FP64) && (USE_CUDA_FP64 == 1)

    template<>


    inline __host__ __device__ double sub<double>(

            const double x,

            const double y)

    {

        return x - y;

    }


    #endif


}  // namespace cu_arithmetics


#endif  // _CU_ARITHMETICS_CU_SUB_H_

cu_arithmetics
Cast from float to __half and __nv_bfloat16 types and vice-versa, and float to double and vice-versa.
Definition _cu_abs.h:43

cu_arithmetics::abs
__host__ __device__ DataType abs(const DataType x)
Absolute value of a floating point number.

cu_arithmetics::sub< float >
__host__ __device__ float sub< float >(const float x, const float y)
Subtract two __nv_fp8_e5m2 type numbers in round-to-nearest-even mode.
Definition _cu_sub.h:195

cu_arithmetics::sub
__host__ __device__ DataType sub(const DataType x, const DataType y)
Subtract two floating point numbers in round-to-nearest-even mode.

cu_arithmetics::sub< double >
__host__ __device__ double sub< double >(const double x, const double y)
Subtract two double type float numbers.
Definition _cu_sub.h:221

__nv_fp8_e4m3
Definition cu_types.h:34

__nv_fp8_e5m2
Definition cu_types.h:27