| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990 |
- //
- // SPDX-License-Identifier: BSD-3-Clause
- // Copyright Contributors to the OpenEXR Project.
- //
- //
- // Primary original authors:
- // Florian Kainz <kainz@ilm.com>
- // Rod Bogart <rgb@ilm.com>
- //
- #ifndef IMATH_HALF_H_
- #define IMATH_HALF_H_
- #include "ImathExport.h"
- #include "ImathNamespace.h"
- #include "ImathPlatform.h"
- /// @file half.h
- /// The half type is a 16-bit floating number, compatible with the
- /// IEEE 754-2008 binary16 type.
- ///
- /// **Representation of a 32-bit float:**
- ///
- /// We assume that a float, f, is an IEEE 754 single-precision
- /// floating point number, whose bits are arranged as follows:
- ///
- /// 31 (msb)
- /// |
- /// | 30 23
- /// | | |
- /// | | | 22 0 (lsb)
- /// | | | | |
- /// X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX
- ///
- /// s e m
- ///
- /// S is the sign-bit, e is the exponent and m is the significand.
- ///
- /// If e is between 1 and 254, f is a normalized number:
- ///
- /// s e-127
- /// f = (-1) * 2 * 1.m
- ///
- /// If e is 0, and m is not zero, f is a denormalized number:
- ///
- /// s -126
- /// f = (-1) * 2 * 0.m
- ///
- /// If e and m are both zero, f is zero:
- ///
- /// f = 0.0
- ///
- /// If e is 255, f is an "infinity" or "not a number" (NAN),
- /// depending on whether m is zero or not.
- ///
- /// Examples:
- ///
- /// 0 00000000 00000000000000000000000 = 0.0
- /// 0 01111110 00000000000000000000000 = 0.5
- /// 0 01111111 00000000000000000000000 = 1.0
- /// 0 10000000 00000000000000000000000 = 2.0
- /// 0 10000000 10000000000000000000000 = 3.0
- /// 1 10000101 11110000010000000000000 = -124.0625
- /// 0 11111111 00000000000000000000000 = +infinity
- /// 1 11111111 00000000000000000000000 = -infinity
- /// 0 11111111 10000000000000000000000 = NAN
- /// 1 11111111 11111111111111111111111 = NAN
- ///
- /// **Representation of a 16-bit half:**
- ///
- /// Here is the bit-layout for a half number, h:
- ///
- /// 15 (msb)
- /// |
- /// | 14 10
- /// | | |
- /// | | | 9 0 (lsb)
- /// | | | | |
- /// X XXXXX XXXXXXXXXX
- ///
- /// s e m
- ///
- /// S is the sign-bit, e is the exponent and m is the significand.
- ///
- /// If e is between 1 and 30, h is a normalized number:
- ///
- /// s e-15
- /// h = (-1) * 2 * 1.m
- ///
- /// If e is 0, and m is not zero, h is a denormalized number:
- ///
- /// S -14
- /// h = (-1) * 2 * 0.m
- ///
- /// If e and m are both zero, h is zero:
- ///
- /// h = 0.0
- ///
- /// If e is 31, h is an "infinity" or "not a number" (NAN),
- /// depending on whether m is zero or not.
- ///
- /// Examples:
- ///
- /// 0 00000 0000000000 = 0.0
- /// 0 01110 0000000000 = 0.5
- /// 0 01111 0000000000 = 1.0
- /// 0 10000 0000000000 = 2.0
- /// 0 10000 1000000000 = 3.0
- /// 1 10101 1111000001 = -124.0625
- /// 0 11111 0000000000 = +infinity
- /// 1 11111 0000000000 = -infinity
- /// 0 11111 1000000000 = NAN
- /// 1 11111 1111111111 = NAN
- ///
- /// **Conversion via Lookup Table:**
- ///
- /// Converting from half to float is performed by default using a
- /// lookup table. There are only 65,536 different half numbers; each
- /// of these numbers has been converted and stored in a table pointed
- /// to by the ``imath_half_to_float_table`` pointer.
- ///
- /// Prior to Imath v3.1, conversion from float to half was
- /// accomplished with the help of an exponent look table, but this is
- /// now replaced with explicit bit shifting.
- ///
- /// **Conversion via Hardware:**
- ///
- /// For Imath v3.1, the conversion routines have been extended to use
- /// F16C SSE instructions whenever present and enabled by compiler
- /// flags.
- ///
- /// **Conversion via Bit-Shifting**
- ///
- /// If F16C SSE instructions are not available, conversion can be
- /// accomplished by a bit-shifting algorithm. For half-to-float
- /// conversion, this is generally slower than the lookup table, but it
- /// may be preferable when memory limits preclude storing of the
- /// 65,536-entry lookup table.
- ///
- /// The lookup table symbol is included in the compilation even if
- /// ``IMATH_HALF_USE_LOOKUP_TABLE`` is false, because application code
- /// using the exported ``half.h`` may choose to enable the use of the table.
- ///
- /// An implementation can eliminate the table from compilation by
- /// defining the ``IMATH_HALF_NO_LOOKUP_TABLE`` preprocessor symbol.
- /// Simply add:
- ///
- /// #define IMATH_HALF_NO_LOOKUP_TABLE
- ///
- /// before including ``half.h``, or define the symbol on the compile
- /// command line.
- ///
- /// Furthermore, an implementation wishing to receive ``FE_OVERFLOW``
- /// and ``FE_UNDERFLOW`` floating point exceptions when converting
- /// float to half by the bit-shift algorithm can define the
- /// preprocessor symbol ``IMATH_HALF_ENABLE_FP_EXCEPTIONS`` prior to
- /// including ``half.h``:
- ///
- /// #define IMATH_HALF_ENABLE_FP_EXCEPTIONS
- ///
- /// **Conversion Performance Comparison:**
- ///
- /// Testing on a Core i9, the timings are approximately:
- ///
- /// half to float
- /// - table: 0.71 ns / call
- /// - no table: 1.06 ns / call
- /// - f16c: 0.45 ns / call
- ///
- /// float-to-half:
- /// - original: 5.2 ns / call
- /// - no exp table + opt: 1.27 ns / call
- /// - f16c: 0.45 ns / call
- ///
- /// **Note:** the timing above depends on the distribution of the
- /// floats in question.
- ///
- #ifdef __CUDA_ARCH__
- // do not include intrinsics headers on Cuda
- #elif defined(_WIN32)
- # include <intrin.h>
- #elif defined(__x86_64__)
- # include <x86intrin.h>
- #elif defined(__F16C__)
- # include <immintrin.h>
- #endif
- #include <stdint.h>
- #include <stdio.h>
- #ifdef IMATH_HALF_ENABLE_FP_EXCEPTIONS
- # include <fenv.h>
- #endif
- //-------------------------------------------------------------------------
- // Limits
- //
- // Visual C++ will complain if HALF_DENORM_MIN, HALF_NRM_MIN etc. are not float
- // constants, but at least one other compiler (gcc 2.96) produces incorrect
- // results if they are.
- //-------------------------------------------------------------------------
- #if (defined _WIN32 || defined _WIN64) && defined _MSC_VER
- /// Smallest positive denormalized half
- # define HALF_DENORM_MIN 5.96046448e-08f
- /// Smallest positive normalized half
- # define HALF_NRM_MIN 6.10351562e-05f
- /// Smallest positive normalized half
- # define HALF_MIN 6.10351562e-05f
- /// Largest positive half
- # define HALF_MAX 65504.0f
- /// Smallest positive e for which ``half(1.0 + e) != half(1.0)``
- # define HALF_EPSILON 0.00097656f
- #else
- /// Smallest positive denormalized half
- # define HALF_DENORM_MIN 5.96046448e-08
- /// Smallest positive normalized half
- # define HALF_NRM_MIN 6.10351562e-05
- /// Smallest positive normalized half
- # define HALF_MIN 6.10351562e-05f
- /// Largest positive half
- # define HALF_MAX 65504.0
- /// Smallest positive e for which ``half(1.0 + e) != half(1.0)``
- # define HALF_EPSILON 0.00097656
- #endif
- /// Number of digits in mantissa (significand + hidden leading 1)
- #define HALF_MANT_DIG 11
- /// Number of base 10 digits that can be represented without change:
- ///
- /// ``floor( (HALF_MANT_DIG - 1) * log10(2) ) => 3.01... -> 3``
- #define HALF_DIG 3
- /// Number of base-10 digits that are necessary to uniquely represent
- /// all distinct values:
- ///
- /// ``ceil(HALF_MANT_DIG * log10(2) + 1) => 4.31... -> 5``
- #define HALF_DECIMAL_DIG 5
- /// Base of the exponent
- #define HALF_RADIX 2
- /// Minimum negative integer such that ``HALF_RADIX`` raised to the power
- /// of one less than that integer is a normalized half
- #define HALF_DENORM_MIN_EXP -13
- /// Maximum positive integer such that ``HALF_RADIX`` raised to the power
- /// of one less than that integer is a normalized half
- #define HALF_MAX_EXP 16
- /// Minimum positive integer such that 10 raised to that power is a
- /// normalized half
- #define HALF_DENORM_MIN_10_EXP -4
- /// Maximum positive integer such that 10 raised to that power is a
- /// normalized half
- #define HALF_MAX_10_EXP 4
- /// a type for both C-only programs and C++ to use the same utilities
- typedef union imath_half_uif
- {
- uint32_t i;
- float f;
- } imath_half_uif_t;
- /// a type for both C-only programs and C++ to use the same utilities
- typedef uint16_t imath_half_bits_t;
- #if !defined(__cplusplus) && !defined(__CUDACC__)
- /// if we're in a C-only context, alias the half bits type to half
- typedef imath_half_bits_t half;
- #endif
- #if !defined(IMATH_HALF_NO_LOOKUP_TABLE)
- # if defined(__cplusplus)
- extern "C"
- # else
- extern
- # endif
- IMATH_EXPORT const imath_half_uif_t* imath_half_to_float_table;
- #endif
- ///
- /// Convert half to float
- ///
- static inline float
- imath_half_to_float (imath_half_bits_t h)
- {
- #if defined(__F16C__)
- // NB: The intel implementation does seem to treat NaN slightly
- // different than the original toFloat table does (i.e. where the
- // 1 bits are, meaning the signalling or not bits). This seems
- // benign, given that the original library didn't really deal with
- // signalling vs non-signalling NaNs
- # ifdef _MSC_VER
- /* msvc does not seem to have cvtsh_ss :( */
- return _mm_cvtss_f32 (_mm_cvtph_ps (_mm_set1_epi16 (h)));
- # else
- return _cvtsh_ss (h);
- # endif
- #elif defined(IMATH_HALF_USE_LOOKUP_TABLE) && !defined(IMATH_HALF_NO_LOOKUP_TABLE)
- return imath_half_to_float_table[h].f;
- #else
- imath_half_uif_t v;
- // this code would be clearer, although it does appear to be faster
- // (1.06 vs 1.08 ns/call) to avoid the constants and just do 4
- // shifts.
- //
- uint32_t hexpmant = ( (uint32_t)(h) << 17 ) >> 4;
- v.i = ((uint32_t)(h >> 15)) << 31;
- // the likely really does help if most of your numbers are "normal" half numbers
- if (IMATH_LIKELY ((hexpmant >= 0x00800000)))
- {
- v.i |= hexpmant;
- // either we are a normal number, in which case add in the bias difference
- // otherwise make sure all exponent bits are set
- if (IMATH_LIKELY ((hexpmant < 0x0f800000)))
- v.i += 0x38000000;
- else
- v.i |= 0x7f800000;
- }
- else if (hexpmant != 0)
- {
- // exponent is 0 because we're denormal, don't have to extract
- // the mantissa, can just use as is
- //
- //
- // other compilers may provide count-leading-zeros primitives,
- // but we need the community to inform us of the variants
- uint32_t lc;
- # if defined(_MSC_VER) && (_M_IX86 || _M_X64)
- lc = __lzcnt (hexpmant);
- # elif defined(__GNUC__) || defined(__clang__)
- lc = (uint32_t) __builtin_clz (hexpmant);
- # else
- lc = 0;
- while (0 == ((hexpmant << lc) & 0x80000000))
- ++lc;
- # endif
- lc -= 8;
- // so nominally we want to remove that extra bit we shifted
- // up, but we are going to add that bit back in, then subtract
- // from it with the 0x38800000 - (lc << 23)....
- //
- // by combining, this allows us to skip the & operation (and
- // remove a constant)
- //
- // hexpmant &= ~0x00800000;
- v.i |= 0x38800000;
- // lc is now x, where the desired exponent is then
- // -14 - lc
- // + 127 -> new exponent
- v.i |= (hexpmant << lc);
- v.i -= (lc << 23);
- }
- return v.f;
- #endif
- }
- ///
- /// Convert half to float
- ///
- /// Note: This only supports the "round to even" rounding mode, which
- /// was the only mode supported by the original OpenEXR library
- ///
- static inline imath_half_bits_t
- imath_float_to_half (float f)
- {
- #if defined(__F16C__)
- # ifdef _MSC_VER
- // msvc does not seem to have cvtsh_ss :(
- return _mm_extract_epi16 (
- _mm_cvtps_ph (_mm_set_ss (f), (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)),
- 0);
- # else
- // preserve the fixed rounding mode to nearest
- return _cvtss_sh (f, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
- # endif
- #else
- imath_half_uif_t v;
- imath_half_bits_t ret;
- uint32_t e, m, ui, r, shift;
- v.f = f;
- ui = (v.i & ~0x80000000);
- ret = ((v.i >> 16) & 0x8000);
- // exponent large enough to result in a normal number, round and return
- if (ui >= 0x38800000)
- {
- // inf or nan
- if (IMATH_UNLIKELY (ui >= 0x7f800000))
- {
- ret |= 0x7c00;
- if (ui == 0x7f800000)
- return ret;
- m = (ui & 0x7fffff) >> 13;
- // make sure we have at least one bit after shift to preserve nan-ness
- return ret | (uint16_t)m | (uint16_t)(m == 0);
- }
- // too large, round to infinity
- if (IMATH_UNLIKELY (ui > 0x477fefff))
- {
- # ifdef IMATH_HALF_ENABLE_FP_EXCEPTIONS
- feraiseexcept (FE_OVERFLOW);
- # endif
- return ret | 0x7c00;
- }
- ui -= 0x38000000;
- ui = ((ui + 0x00000fff + ((ui >> 13) & 1)) >> 13);
- return ret | (uint16_t)ui;
- }
- // zero or flush to 0
- if (ui < 0x33000001)
- {
- # ifdef IMATH_HALF_ENABLE_FP_EXCEPTIONS
- if (ui == 0)
- return ret;
- feraiseexcept (FE_UNDERFLOW);
- # endif
- return ret;
- }
- // produce a denormalized half
- e = (ui >> 23);
- shift = 0x7e - e;
- m = 0x800000 | (ui & 0x7fffff);
- r = m << (32 - shift);
- ret |= (m >> shift);
- if (r > 0x80000000 || (r == 0x80000000 && (ret & 0x1) != 0))
- ++ret;
- return ret;
- #endif
- }
- ////////////////////////////////////////
- #ifdef __cplusplus
- # include <iostream>
- IMATH_INTERNAL_NAMESPACE_HEADER_ENTER
- ///
- ///
- /// class half represents a 16-bit floating point number
- ///
- /// Type half can represent positive and negative numbers whose
- /// magnitude is between roughly 6.1e-5 and 6.5e+4 with a relative
- /// error of 9.8e-4; numbers smaller than 6.1e-5 can be represented
- /// with an absolute error of 6.0e-8. All integers from -2048 to
- /// +2048 can be represented exactly.
- ///
- /// Type half behaves (almost) like the built-in C++ floating point
- /// types. In arithmetic expressions, half, float and double can be
- /// mixed freely. Here are a few examples:
- ///
- /// half a (3.5);
- /// float b (a + sqrt (a));
- /// a += b;
- /// b += a;
- /// b = a + 7;
- ///
- /// Conversions from half to float are lossless; all half numbers
- /// are exactly representable as floats.
- ///
- /// Conversions from float to half may not preserve a float's value
- /// exactly. If a float is not representable as a half, then the
- /// float value is rounded to the nearest representable half. If a
- /// float value is exactly in the middle between the two closest
- /// representable half values, then the float value is rounded to
- /// the closest half whose least significant bit is zero.
- ///
- /// Overflows during float-to-half conversions cause arithmetic
- /// exceptions. An overflow occurs when the float value to be
- /// converted is too large to be represented as a half, or if the
- /// float value is an infinity or a NAN.
- ///
- /// The implementation of type half makes the following assumptions
- /// about the implementation of the built-in C++ types:
- ///
- /// * float is an IEEE 754 single-precision number
- /// * sizeof (float) == 4
- /// * sizeof (unsigned int) == sizeof (float)
- /// * alignof (unsigned int) == alignof (float)
- /// * sizeof (uint16_t) == 2
- ///
- class IMATH_EXPORT_TYPE half
- {
- public:
- /// A special tag that lets us initialize a half from the raw bits.
- enum IMATH_EXPORT_ENUM FromBitsTag
- {
- FromBits
- };
- /// @{
- /// @name Constructors
- /// Default construction provides no initialization (hence it is
- /// not constexpr).
- half() IMATH_NOEXCEPT = default;
- /// Construct from float
- half (float f) IMATH_NOEXCEPT;
- /// Construct from bit-vector
- constexpr half (FromBitsTag, uint16_t bits) IMATH_NOEXCEPT;
- /// Copy constructor
- constexpr half (const half&) IMATH_NOEXCEPT = default;
- /// Move constructor
- constexpr half (half&&) IMATH_NOEXCEPT = default;
- /// Destructor
- ~half() IMATH_NOEXCEPT = default;
- /// @}
- /// Conversion to float
- operator float() const IMATH_NOEXCEPT;
- /// @{
- /// @name Basic Algebra
- /// Unary minus
- constexpr half operator-() const IMATH_NOEXCEPT;
- /// Assignment
- half& operator= (const half& h) IMATH_NOEXCEPT = default;
- /// Move assignment
- half& operator= (half&& h) IMATH_NOEXCEPT = default;
- /// Assignment from float
- half& operator= (float f) IMATH_NOEXCEPT;
- /// Addition assignment
- half& operator+= (half h) IMATH_NOEXCEPT;
- /// Addition assignment from float
- half& operator+= (float f) IMATH_NOEXCEPT;
- /// Subtraction assignment
- half& operator-= (half h) IMATH_NOEXCEPT;
- /// Subtraction assignment from float
- half& operator-= (float f) IMATH_NOEXCEPT;
- /// Multiplication assignment
- half& operator*= (half h) IMATH_NOEXCEPT;
- /// Multiplication assignment from float
- half& operator*= (float f) IMATH_NOEXCEPT;
- /// Division assignment
- half& operator/= (half h) IMATH_NOEXCEPT;
- /// Division assignment from float
- half& operator/= (float f) IMATH_NOEXCEPT;
- /// @}
- /// Round to n-bit precision (n should be between 0 and 10).
- /// After rounding, the significand's 10-n least significant
- /// bits will be zero.
- IMATH_CONSTEXPR14 half round (unsigned int n) const IMATH_NOEXCEPT;
- /// @{
- /// @name Classification
- /// Return true if a normalized number, a denormalized number, or
- /// zero.
- constexpr bool isFinite() const IMATH_NOEXCEPT;
- /// Return true if a normalized number.
- constexpr bool isNormalized() const IMATH_NOEXCEPT;
- /// Return true if a denormalized number.
- constexpr bool isDenormalized() const IMATH_NOEXCEPT;
- /// Return true if zero.
- constexpr bool isZero() const IMATH_NOEXCEPT;
- /// Return true if NAN.
- constexpr bool isNan() const IMATH_NOEXCEPT;
- /// Return true if a positive or a negative infinity
- constexpr bool isInfinity() const IMATH_NOEXCEPT;
- /// Return true if the sign bit is set (negative)
- constexpr bool isNegative() const IMATH_NOEXCEPT;
- /// @}
- /// @{
- /// @name Special values
- /// Return +infinity
- static constexpr half posInf() IMATH_NOEXCEPT;
- /// Return -infinity
- static constexpr half negInf() IMATH_NOEXCEPT;
- /// Returns a NAN with the bit pattern 0111111111111111
- static constexpr half qNan() IMATH_NOEXCEPT;
- /// Return a NAN with the bit pattern 0111110111111111
- static constexpr half sNan() IMATH_NOEXCEPT;
- /// @}
- /// @{
- /// @name Access to the internal representation
- /// Return the bit pattern
- constexpr uint16_t bits () const IMATH_NOEXCEPT;
- /// Set the bit pattern
- IMATH_CONSTEXPR14 void setBits (uint16_t bits) IMATH_NOEXCEPT;
- /// @}
- public:
- static_assert (sizeof (float) == sizeof (uint32_t),
- "Assumption about the size of floats correct");
- using uif = imath_half_uif;
- private:
- constexpr uint16_t mantissa() const IMATH_NOEXCEPT;
- constexpr uint16_t exponent() const IMATH_NOEXCEPT;
- uint16_t _h;
- };
- //----------------------------
- // Half-from-float constructor
- //----------------------------
- inline half::half (float f) IMATH_NOEXCEPT
- : _h (imath_float_to_half (f))
- {
- }
- //------------------------------------------
- // Half from raw bits constructor
- //------------------------------------------
- inline constexpr half::half (FromBitsTag, uint16_t bits) IMATH_NOEXCEPT : _h (bits)
- {}
- //-------------------------
- // Half-to-float conversion
- //-------------------------
- inline half::operator float() const IMATH_NOEXCEPT
- {
- return imath_half_to_float (_h);
- }
- //-------------------------
- // Round to n-bit precision
- //-------------------------
- inline IMATH_CONSTEXPR14 half
- half::round (unsigned int n) const IMATH_NOEXCEPT
- {
- //
- // Parameter check.
- //
- if (n >= 10)
- return *this;
- //
- // Disassemble h into the sign, s,
- // and the combined exponent and significand, e.
- //
- uint16_t s = _h & 0x8000;
- uint16_t e = _h & 0x7fff;
- //
- // Round the exponent and significand to the nearest value
- // where ones occur only in the (10-n) most significant bits.
- // Note that the exponent adjusts automatically if rounding
- // up causes the significand to overflow.
- //
- e >>= 9 - n;
- e += e & 1;
- e <<= 9 - n;
- //
- // Check for exponent overflow.
- //
- if (e >= 0x7c00)
- {
- //
- // Overflow occurred -- truncate instead of rounding.
- //
- e = _h;
- e >>= 10 - n;
- e <<= 10 - n;
- }
- //
- // Put the original sign bit back.
- //
- half h (FromBits, s | e);
- return h;
- }
- //-----------------------
- // Other inline functions
- //-----------------------
- inline constexpr half
- half::operator-() const IMATH_NOEXCEPT
- {
- return half (FromBits, bits() ^ 0x8000);
- }
- inline half&
- half::operator= (float f) IMATH_NOEXCEPT
- {
- *this = half (f);
- return *this;
- }
- inline half&
- half::operator+= (half h) IMATH_NOEXCEPT
- {
- *this = half (float (*this) + float (h));
- return *this;
- }
- inline half&
- half::operator+= (float f) IMATH_NOEXCEPT
- {
- *this = half (float (*this) + f);
- return *this;
- }
- inline half&
- half::operator-= (half h) IMATH_NOEXCEPT
- {
- *this = half (float (*this) - float (h));
- return *this;
- }
- inline half&
- half::operator-= (float f) IMATH_NOEXCEPT
- {
- *this = half (float (*this) - f);
- return *this;
- }
- inline half&
- half::operator*= (half h) IMATH_NOEXCEPT
- {
- *this = half (float (*this) * float (h));
- return *this;
- }
- inline half&
- half::operator*= (float f) IMATH_NOEXCEPT
- {
- *this = half (float (*this) * f);
- return *this;
- }
- inline half&
- half::operator/= (half h) IMATH_NOEXCEPT
- {
- *this = half (float (*this) / float (h));
- return *this;
- }
- inline half&
- half::operator/= (float f) IMATH_NOEXCEPT
- {
- *this = half (float (*this) / f);
- return *this;
- }
- inline constexpr uint16_t
- half::mantissa() const IMATH_NOEXCEPT
- {
- return _h & 0x3ff;
- }
- inline constexpr uint16_t
- half::exponent() const IMATH_NOEXCEPT
- {
- return (_h >> 10) & 0x001f;
- }
- inline constexpr bool
- half::isFinite() const IMATH_NOEXCEPT
- {
- return exponent() < 31;
- }
- inline constexpr bool
- half::isNormalized() const IMATH_NOEXCEPT
- {
- return exponent() > 0 && exponent() < 31;
- }
- inline constexpr bool
- half::isDenormalized() const IMATH_NOEXCEPT
- {
- return exponent() == 0 && mantissa() != 0;
- }
- inline constexpr bool
- half::isZero() const IMATH_NOEXCEPT
- {
- return (_h & 0x7fff) == 0;
- }
- inline constexpr bool
- half::isNan() const IMATH_NOEXCEPT
- {
- return exponent() == 31 && mantissa() != 0;
- }
- inline constexpr bool
- half::isInfinity() const IMATH_NOEXCEPT
- {
- return exponent() == 31 && mantissa() == 0;
- }
- inline constexpr bool
- half::isNegative() const IMATH_NOEXCEPT
- {
- return (_h & 0x8000) != 0;
- }
- inline constexpr half
- half::posInf() IMATH_NOEXCEPT
- {
- return half (FromBits, 0x7c00);
- }
- inline constexpr half
- half::negInf() IMATH_NOEXCEPT
- {
- return half (FromBits, 0xfc00);
- }
- inline constexpr half
- half::qNan() IMATH_NOEXCEPT
- {
- return half (FromBits, 0x7fff);
- }
- inline constexpr half
- half::sNan() IMATH_NOEXCEPT
- {
- return half (FromBits, 0x7dff);
- }
- inline constexpr uint16_t
- half::bits() const IMATH_NOEXCEPT
- {
- return _h;
- }
- inline IMATH_CONSTEXPR14 void
- half::setBits (uint16_t bits) IMATH_NOEXCEPT
- {
- _h = bits;
- }
- IMATH_INTERNAL_NAMESPACE_HEADER_EXIT
- /// Output h to os, formatted as a float
- IMATH_EXPORT std::ostream& operator<< (std::ostream& os, IMATH_INTERNAL_NAMESPACE::half h);
- /// Input h from is
- IMATH_EXPORT std::istream& operator>> (std::istream& is, IMATH_INTERNAL_NAMESPACE::half& h);
- #include <limits>
- namespace std
- {
- template <> class numeric_limits<IMATH_INTERNAL_NAMESPACE::half>
- {
- public:
- static const bool is_specialized = true;
- static constexpr IMATH_INTERNAL_NAMESPACE::half min () IMATH_NOEXCEPT
- {
- return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x0400); /*HALF_MIN*/
- }
- static constexpr IMATH_INTERNAL_NAMESPACE::half max () IMATH_NOEXCEPT
- {
- return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x7bff); /*HALF_MAX*/
- }
- static constexpr IMATH_INTERNAL_NAMESPACE::half lowest ()
- {
- return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0xfbff); /* -HALF_MAX */
- }
- static constexpr int digits = HALF_MANT_DIG;
- static constexpr int digits10 = HALF_DIG;
- static constexpr int max_digits10 = HALF_DECIMAL_DIG;
- static constexpr bool is_signed = true;
- static constexpr bool is_integer = false;
- static constexpr bool is_exact = false;
- static constexpr int radix = HALF_RADIX;
- static constexpr IMATH_INTERNAL_NAMESPACE::half epsilon () IMATH_NOEXCEPT
- {
- return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x1400); /*HALF_EPSILON*/
- }
- static constexpr IMATH_INTERNAL_NAMESPACE::half round_error () IMATH_NOEXCEPT
- {
- return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x3800); /*0.5*/
- }
- static constexpr int min_exponent = HALF_DENORM_MIN_EXP;
- static constexpr int min_exponent10 = HALF_DENORM_MIN_10_EXP;
- static constexpr int max_exponent = HALF_MAX_EXP;
- static constexpr int max_exponent10 = HALF_MAX_10_EXP;
- static constexpr bool has_infinity = true;
- static constexpr bool has_quiet_NaN = true;
- static constexpr bool has_signaling_NaN = true;
- static constexpr float_denorm_style has_denorm = denorm_present;
- static constexpr bool has_denorm_loss = false;
- static constexpr IMATH_INTERNAL_NAMESPACE::half infinity () IMATH_NOEXCEPT
- {
- return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x7c00); /*half::posInf()*/
- }
- static constexpr IMATH_INTERNAL_NAMESPACE::half quiet_NaN () IMATH_NOEXCEPT
- {
- return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x7fff); /*half::qNan()*/
- }
- static constexpr IMATH_INTERNAL_NAMESPACE::half signaling_NaN () IMATH_NOEXCEPT
- {
- return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x7dff); /*half::sNan()*/
- }
- static constexpr IMATH_INTERNAL_NAMESPACE::half denorm_min () IMATH_NOEXCEPT
- {
- return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x0001); /*HALF_DENORM_MIN*/
- }
- static constexpr bool is_iec559 = false;
- static constexpr bool is_bounded = false;
- static constexpr bool is_modulo = false;
- static constexpr bool traps = true;
- static constexpr bool tinyness_before = false;
- static constexpr float_round_style round_style = round_to_nearest;
- };
- } // namespace std
- //----------
- // Debugging
- //----------
- IMATH_EXPORT void printBits (std::ostream& os, IMATH_INTERNAL_NAMESPACE::half h);
- IMATH_EXPORT void printBits (std::ostream& os, float f);
- IMATH_EXPORT void printBits (char c[19], IMATH_INTERNAL_NAMESPACE::half h);
- IMATH_EXPORT void printBits (char c[35], float f);
- # if !defined(__CUDACC__) && !defined(__CUDA_FP16_HPP__)
- using half = IMATH_INTERNAL_NAMESPACE::half;
- # else
- # include <cuda_fp16.h>
- # endif
- #endif // __cplusplus
- #endif // IMATH_HALF_H_
|