half.h 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990
  1. //
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. // Copyright Contributors to the OpenEXR Project.
  4. //
  5. //
  6. // Primary original authors:
  7. // Florian Kainz <kainz@ilm.com>
  8. // Rod Bogart <rgb@ilm.com>
  9. //
  10. #ifndef IMATH_HALF_H_
  11. #define IMATH_HALF_H_
  12. #include "ImathExport.h"
  13. #include "ImathNamespace.h"
  14. #include "ImathPlatform.h"
  15. /// @file half.h
  16. /// The half type is a 16-bit floating number, compatible with the
  17. /// IEEE 754-2008 binary16 type.
  18. ///
  19. /// **Representation of a 32-bit float:**
  20. ///
  21. /// We assume that a float, f, is an IEEE 754 single-precision
  22. /// floating point number, whose bits are arranged as follows:
  23. ///
  24. /// 31 (msb)
  25. /// |
  26. /// | 30 23
  27. /// | | |
  28. /// | | | 22 0 (lsb)
  29. /// | | | | |
  30. /// X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX
  31. ///
  32. /// s e m
  33. ///
  34. /// S is the sign-bit, e is the exponent and m is the significand.
  35. ///
  36. /// If e is between 1 and 254, f is a normalized number:
  37. ///
  38. /// s e-127
  39. /// f = (-1) * 2 * 1.m
  40. ///
  41. /// If e is 0, and m is not zero, f is a denormalized number:
  42. ///
  43. /// s -126
  44. /// f = (-1) * 2 * 0.m
  45. ///
  46. /// If e and m are both zero, f is zero:
  47. ///
  48. /// f = 0.0
  49. ///
  50. /// If e is 255, f is an "infinity" or "not a number" (NAN),
  51. /// depending on whether m is zero or not.
  52. ///
  53. /// Examples:
  54. ///
  55. /// 0 00000000 00000000000000000000000 = 0.0
  56. /// 0 01111110 00000000000000000000000 = 0.5
  57. /// 0 01111111 00000000000000000000000 = 1.0
  58. /// 0 10000000 00000000000000000000000 = 2.0
  59. /// 0 10000000 10000000000000000000000 = 3.0
  60. /// 1 10000101 11110000010000000000000 = -124.0625
  61. /// 0 11111111 00000000000000000000000 = +infinity
  62. /// 1 11111111 00000000000000000000000 = -infinity
  63. /// 0 11111111 10000000000000000000000 = NAN
  64. /// 1 11111111 11111111111111111111111 = NAN
  65. ///
  66. /// **Representation of a 16-bit half:**
  67. ///
  68. /// Here is the bit-layout for a half number, h:
  69. ///
  70. /// 15 (msb)
  71. /// |
  72. /// | 14 10
  73. /// | | |
  74. /// | | | 9 0 (lsb)
  75. /// | | | | |
  76. /// X XXXXX XXXXXXXXXX
  77. ///
  78. /// s e m
  79. ///
  80. /// S is the sign-bit, e is the exponent and m is the significand.
  81. ///
  82. /// If e is between 1 and 30, h is a normalized number:
  83. ///
  84. /// s e-15
  85. /// h = (-1) * 2 * 1.m
  86. ///
  87. /// If e is 0, and m is not zero, h is a denormalized number:
  88. ///
  89. /// S -14
  90. /// h = (-1) * 2 * 0.m
  91. ///
  92. /// If e and m are both zero, h is zero:
  93. ///
  94. /// h = 0.0
  95. ///
  96. /// If e is 31, h is an "infinity" or "not a number" (NAN),
  97. /// depending on whether m is zero or not.
  98. ///
  99. /// Examples:
  100. ///
  101. /// 0 00000 0000000000 = 0.0
  102. /// 0 01110 0000000000 = 0.5
  103. /// 0 01111 0000000000 = 1.0
  104. /// 0 10000 0000000000 = 2.0
  105. /// 0 10000 1000000000 = 3.0
  106. /// 1 10101 1111000001 = -124.0625
  107. /// 0 11111 0000000000 = +infinity
  108. /// 1 11111 0000000000 = -infinity
  109. /// 0 11111 1000000000 = NAN
  110. /// 1 11111 1111111111 = NAN
  111. ///
  112. /// **Conversion via Lookup Table:**
  113. ///
  114. /// Converting from half to float is performed by default using a
  115. /// lookup table. There are only 65,536 different half numbers; each
  116. /// of these numbers has been converted and stored in a table pointed
  117. /// to by the ``imath_half_to_float_table`` pointer.
  118. ///
  119. /// Prior to Imath v3.1, conversion from float to half was
  120. /// accomplished with the help of an exponent look table, but this is
  121. /// now replaced with explicit bit shifting.
  122. ///
  123. /// **Conversion via Hardware:**
  124. ///
  125. /// For Imath v3.1, the conversion routines have been extended to use
  126. /// F16C SSE instructions whenever present and enabled by compiler
  127. /// flags.
  128. ///
  129. /// **Conversion via Bit-Shifting**
  130. ///
  131. /// If F16C SSE instructions are not available, conversion can be
  132. /// accomplished by a bit-shifting algorithm. For half-to-float
  133. /// conversion, this is generally slower than the lookup table, but it
  134. /// may be preferable when memory limits preclude storing of the
  135. /// 65,536-entry lookup table.
  136. ///
  137. /// The lookup table symbol is included in the compilation even if
  138. /// ``IMATH_HALF_USE_LOOKUP_TABLE`` is false, because application code
  139. /// using the exported ``half.h`` may choose to enable the use of the table.
  140. ///
  141. /// An implementation can eliminate the table from compilation by
  142. /// defining the ``IMATH_HALF_NO_LOOKUP_TABLE`` preprocessor symbol.
  143. /// Simply add:
  144. ///
  145. /// #define IMATH_HALF_NO_LOOKUP_TABLE
  146. ///
  147. /// before including ``half.h``, or define the symbol on the compile
  148. /// command line.
  149. ///
  150. /// Furthermore, an implementation wishing to receive ``FE_OVERFLOW``
  151. /// and ``FE_UNDERFLOW`` floating point exceptions when converting
  152. /// float to half by the bit-shift algorithm can define the
  153. /// preprocessor symbol ``IMATH_HALF_ENABLE_FP_EXCEPTIONS`` prior to
  154. /// including ``half.h``:
  155. ///
  156. /// #define IMATH_HALF_ENABLE_FP_EXCEPTIONS
  157. ///
  158. /// **Conversion Performance Comparison:**
  159. ///
  160. /// Testing on a Core i9, the timings are approximately:
  161. ///
  162. /// half to float
  163. /// - table: 0.71 ns / call
  164. /// - no table: 1.06 ns / call
  165. /// - f16c: 0.45 ns / call
  166. ///
  167. /// float-to-half:
  168. /// - original: 5.2 ns / call
  169. /// - no exp table + opt: 1.27 ns / call
  170. /// - f16c: 0.45 ns / call
  171. ///
  172. /// **Note:** the timing above depends on the distribution of the
  173. /// floats in question.
  174. ///
  175. #ifdef __CUDA_ARCH__
  176. // do not include intrinsics headers on Cuda
  177. #elif defined(_WIN32)
  178. # include <intrin.h>
  179. #elif defined(__x86_64__)
  180. # include <x86intrin.h>
  181. #elif defined(__F16C__)
  182. # include <immintrin.h>
  183. #endif
  184. #include <stdint.h>
  185. #include <stdio.h>
  186. #ifdef IMATH_HALF_ENABLE_FP_EXCEPTIONS
  187. # include <fenv.h>
  188. #endif
  189. //-------------------------------------------------------------------------
  190. // Limits
  191. //
  192. // Visual C++ will complain if HALF_DENORM_MIN, HALF_NRM_MIN etc. are not float
  193. // constants, but at least one other compiler (gcc 2.96) produces incorrect
  194. // results if they are.
  195. //-------------------------------------------------------------------------
  196. #if (defined _WIN32 || defined _WIN64) && defined _MSC_VER
  197. /// Smallest positive denormalized half
  198. # define HALF_DENORM_MIN 5.96046448e-08f
  199. /// Smallest positive normalized half
  200. # define HALF_NRM_MIN 6.10351562e-05f
  201. /// Smallest positive normalized half
  202. # define HALF_MIN 6.10351562e-05f
  203. /// Largest positive half
  204. # define HALF_MAX 65504.0f
  205. /// Smallest positive e for which ``half(1.0 + e) != half(1.0)``
  206. # define HALF_EPSILON 0.00097656f
  207. #else
  208. /// Smallest positive denormalized half
  209. # define HALF_DENORM_MIN 5.96046448e-08
  210. /// Smallest positive normalized half
  211. # define HALF_NRM_MIN 6.10351562e-05
  212. /// Smallest positive normalized half
  213. # define HALF_MIN 6.10351562e-05f
  214. /// Largest positive half
  215. # define HALF_MAX 65504.0
  216. /// Smallest positive e for which ``half(1.0 + e) != half(1.0)``
  217. # define HALF_EPSILON 0.00097656
  218. #endif
  219. /// Number of digits in mantissa (significand + hidden leading 1)
  220. #define HALF_MANT_DIG 11
  221. /// Number of base 10 digits that can be represented without change:
  222. ///
  223. /// ``floor( (HALF_MANT_DIG - 1) * log10(2) ) => 3.01... -> 3``
  224. #define HALF_DIG 3
  225. /// Number of base-10 digits that are necessary to uniquely represent
  226. /// all distinct values:
  227. ///
  228. /// ``ceil(HALF_MANT_DIG * log10(2) + 1) => 4.31... -> 5``
  229. #define HALF_DECIMAL_DIG 5
  230. /// Base of the exponent
  231. #define HALF_RADIX 2
  232. /// Minimum negative integer such that ``HALF_RADIX`` raised to the power
  233. /// of one less than that integer is a normalized half
  234. #define HALF_DENORM_MIN_EXP -13
  235. /// Maximum positive integer such that ``HALF_RADIX`` raised to the power
  236. /// of one less than that integer is a normalized half
  237. #define HALF_MAX_EXP 16
  238. /// Minimum positive integer such that 10 raised to that power is a
  239. /// normalized half
  240. #define HALF_DENORM_MIN_10_EXP -4
  241. /// Maximum positive integer such that 10 raised to that power is a
  242. /// normalized half
  243. #define HALF_MAX_10_EXP 4
  244. /// a type for both C-only programs and C++ to use the same utilities
  245. typedef union imath_half_uif
  246. {
  247. uint32_t i;
  248. float f;
  249. } imath_half_uif_t;
  250. /// a type for both C-only programs and C++ to use the same utilities
  251. typedef uint16_t imath_half_bits_t;
  252. #if !defined(__cplusplus) && !defined(__CUDACC__)
  253. /// if we're in a C-only context, alias the half bits type to half
  254. typedef imath_half_bits_t half;
  255. #endif
  256. #if !defined(IMATH_HALF_NO_LOOKUP_TABLE)
  257. # if defined(__cplusplus)
  258. extern "C"
  259. # else
  260. extern
  261. # endif
  262. IMATH_EXPORT const imath_half_uif_t* imath_half_to_float_table;
  263. #endif
  264. ///
  265. /// Convert half to float
  266. ///
  267. static inline float
  268. imath_half_to_float (imath_half_bits_t h)
  269. {
  270. #if defined(__F16C__)
  271. // NB: The intel implementation does seem to treat NaN slightly
  272. // different than the original toFloat table does (i.e. where the
  273. // 1 bits are, meaning the signalling or not bits). This seems
  274. // benign, given that the original library didn't really deal with
  275. // signalling vs non-signalling NaNs
  276. # ifdef _MSC_VER
  277. /* msvc does not seem to have cvtsh_ss :( */
  278. return _mm_cvtss_f32 (_mm_cvtph_ps (_mm_set1_epi16 (h)));
  279. # else
  280. return _cvtsh_ss (h);
  281. # endif
  282. #elif defined(IMATH_HALF_USE_LOOKUP_TABLE) && !defined(IMATH_HALF_NO_LOOKUP_TABLE)
  283. return imath_half_to_float_table[h].f;
  284. #else
  285. imath_half_uif_t v;
  286. // this code would be clearer, although it does appear to be faster
  287. // (1.06 vs 1.08 ns/call) to avoid the constants and just do 4
  288. // shifts.
  289. //
  290. uint32_t hexpmant = ( (uint32_t)(h) << 17 ) >> 4;
  291. v.i = ((uint32_t)(h >> 15)) << 31;
  292. // the likely really does help if most of your numbers are "normal" half numbers
  293. if (IMATH_LIKELY ((hexpmant >= 0x00800000)))
  294. {
  295. v.i |= hexpmant;
  296. // either we are a normal number, in which case add in the bias difference
  297. // otherwise make sure all exponent bits are set
  298. if (IMATH_LIKELY ((hexpmant < 0x0f800000)))
  299. v.i += 0x38000000;
  300. else
  301. v.i |= 0x7f800000;
  302. }
  303. else if (hexpmant != 0)
  304. {
  305. // exponent is 0 because we're denormal, don't have to extract
  306. // the mantissa, can just use as is
  307. //
  308. //
  309. // other compilers may provide count-leading-zeros primitives,
  310. // but we need the community to inform us of the variants
  311. uint32_t lc;
  312. # if defined(_MSC_VER) && (_M_IX86 || _M_X64)
  313. lc = __lzcnt (hexpmant);
  314. # elif defined(__GNUC__) || defined(__clang__)
  315. lc = (uint32_t) __builtin_clz (hexpmant);
  316. # else
  317. lc = 0;
  318. while (0 == ((hexpmant << lc) & 0x80000000))
  319. ++lc;
  320. # endif
  321. lc -= 8;
  322. // so nominally we want to remove that extra bit we shifted
  323. // up, but we are going to add that bit back in, then subtract
  324. // from it with the 0x38800000 - (lc << 23)....
  325. //
  326. // by combining, this allows us to skip the & operation (and
  327. // remove a constant)
  328. //
  329. // hexpmant &= ~0x00800000;
  330. v.i |= 0x38800000;
  331. // lc is now x, where the desired exponent is then
  332. // -14 - lc
  333. // + 127 -> new exponent
  334. v.i |= (hexpmant << lc);
  335. v.i -= (lc << 23);
  336. }
  337. return v.f;
  338. #endif
  339. }
  340. ///
  341. /// Convert half to float
  342. ///
  343. /// Note: This only supports the "round to even" rounding mode, which
  344. /// was the only mode supported by the original OpenEXR library
  345. ///
  346. static inline imath_half_bits_t
  347. imath_float_to_half (float f)
  348. {
  349. #if defined(__F16C__)
  350. # ifdef _MSC_VER
  351. // msvc does not seem to have cvtsh_ss :(
  352. return _mm_extract_epi16 (
  353. _mm_cvtps_ph (_mm_set_ss (f), (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)),
  354. 0);
  355. # else
  356. // preserve the fixed rounding mode to nearest
  357. return _cvtss_sh (f, (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
  358. # endif
  359. #else
  360. imath_half_uif_t v;
  361. imath_half_bits_t ret;
  362. uint32_t e, m, ui, r, shift;
  363. v.f = f;
  364. ui = (v.i & ~0x80000000);
  365. ret = ((v.i >> 16) & 0x8000);
  366. // exponent large enough to result in a normal number, round and return
  367. if (ui >= 0x38800000)
  368. {
  369. // inf or nan
  370. if (IMATH_UNLIKELY (ui >= 0x7f800000))
  371. {
  372. ret |= 0x7c00;
  373. if (ui == 0x7f800000)
  374. return ret;
  375. m = (ui & 0x7fffff) >> 13;
  376. // make sure we have at least one bit after shift to preserve nan-ness
  377. return ret | (uint16_t)m | (uint16_t)(m == 0);
  378. }
  379. // too large, round to infinity
  380. if (IMATH_UNLIKELY (ui > 0x477fefff))
  381. {
  382. # ifdef IMATH_HALF_ENABLE_FP_EXCEPTIONS
  383. feraiseexcept (FE_OVERFLOW);
  384. # endif
  385. return ret | 0x7c00;
  386. }
  387. ui -= 0x38000000;
  388. ui = ((ui + 0x00000fff + ((ui >> 13) & 1)) >> 13);
  389. return ret | (uint16_t)ui;
  390. }
  391. // zero or flush to 0
  392. if (ui < 0x33000001)
  393. {
  394. # ifdef IMATH_HALF_ENABLE_FP_EXCEPTIONS
  395. if (ui == 0)
  396. return ret;
  397. feraiseexcept (FE_UNDERFLOW);
  398. # endif
  399. return ret;
  400. }
  401. // produce a denormalized half
  402. e = (ui >> 23);
  403. shift = 0x7e - e;
  404. m = 0x800000 | (ui & 0x7fffff);
  405. r = m << (32 - shift);
  406. ret |= (m >> shift);
  407. if (r > 0x80000000 || (r == 0x80000000 && (ret & 0x1) != 0))
  408. ++ret;
  409. return ret;
  410. #endif
  411. }
  412. ////////////////////////////////////////
  413. #ifdef __cplusplus
  414. # include <iostream>
  415. IMATH_INTERNAL_NAMESPACE_HEADER_ENTER
  416. ///
  417. ///
  418. /// class half represents a 16-bit floating point number
  419. ///
  420. /// Type half can represent positive and negative numbers whose
  421. /// magnitude is between roughly 6.1e-5 and 6.5e+4 with a relative
  422. /// error of 9.8e-4; numbers smaller than 6.1e-5 can be represented
  423. /// with an absolute error of 6.0e-8. All integers from -2048 to
  424. /// +2048 can be represented exactly.
  425. ///
  426. /// Type half behaves (almost) like the built-in C++ floating point
  427. /// types. In arithmetic expressions, half, float and double can be
  428. /// mixed freely. Here are a few examples:
  429. ///
  430. /// half a (3.5);
  431. /// float b (a + sqrt (a));
  432. /// a += b;
  433. /// b += a;
  434. /// b = a + 7;
  435. ///
  436. /// Conversions from half to float are lossless; all half numbers
  437. /// are exactly representable as floats.
  438. ///
  439. /// Conversions from float to half may not preserve a float's value
  440. /// exactly. If a float is not representable as a half, then the
  441. /// float value is rounded to the nearest representable half. If a
  442. /// float value is exactly in the middle between the two closest
  443. /// representable half values, then the float value is rounded to
  444. /// the closest half whose least significant bit is zero.
  445. ///
  446. /// Overflows during float-to-half conversions cause arithmetic
  447. /// exceptions. An overflow occurs when the float value to be
  448. /// converted is too large to be represented as a half, or if the
  449. /// float value is an infinity or a NAN.
  450. ///
  451. /// The implementation of type half makes the following assumptions
  452. /// about the implementation of the built-in C++ types:
  453. ///
  454. /// * float is an IEEE 754 single-precision number
  455. /// * sizeof (float) == 4
  456. /// * sizeof (unsigned int) == sizeof (float)
  457. /// * alignof (unsigned int) == alignof (float)
  458. /// * sizeof (uint16_t) == 2
  459. ///
  460. class IMATH_EXPORT_TYPE half
  461. {
  462. public:
  463. /// A special tag that lets us initialize a half from the raw bits.
  464. enum IMATH_EXPORT_ENUM FromBitsTag
  465. {
  466. FromBits
  467. };
  468. /// @{
  469. /// @name Constructors
  470. /// Default construction provides no initialization (hence it is
  471. /// not constexpr).
  472. half() IMATH_NOEXCEPT = default;
  473. /// Construct from float
  474. half (float f) IMATH_NOEXCEPT;
  475. /// Construct from bit-vector
  476. constexpr half (FromBitsTag, uint16_t bits) IMATH_NOEXCEPT;
  477. /// Copy constructor
  478. constexpr half (const half&) IMATH_NOEXCEPT = default;
  479. /// Move constructor
  480. constexpr half (half&&) IMATH_NOEXCEPT = default;
  481. /// Destructor
  482. ~half() IMATH_NOEXCEPT = default;
  483. /// @}
  484. /// Conversion to float
  485. operator float() const IMATH_NOEXCEPT;
  486. /// @{
  487. /// @name Basic Algebra
  488. /// Unary minus
  489. constexpr half operator-() const IMATH_NOEXCEPT;
  490. /// Assignment
  491. half& operator= (const half& h) IMATH_NOEXCEPT = default;
  492. /// Move assignment
  493. half& operator= (half&& h) IMATH_NOEXCEPT = default;
  494. /// Assignment from float
  495. half& operator= (float f) IMATH_NOEXCEPT;
  496. /// Addition assignment
  497. half& operator+= (half h) IMATH_NOEXCEPT;
  498. /// Addition assignment from float
  499. half& operator+= (float f) IMATH_NOEXCEPT;
  500. /// Subtraction assignment
  501. half& operator-= (half h) IMATH_NOEXCEPT;
  502. /// Subtraction assignment from float
  503. half& operator-= (float f) IMATH_NOEXCEPT;
  504. /// Multiplication assignment
  505. half& operator*= (half h) IMATH_NOEXCEPT;
  506. /// Multiplication assignment from float
  507. half& operator*= (float f) IMATH_NOEXCEPT;
  508. /// Division assignment
  509. half& operator/= (half h) IMATH_NOEXCEPT;
  510. /// Division assignment from float
  511. half& operator/= (float f) IMATH_NOEXCEPT;
  512. /// @}
  513. /// Round to n-bit precision (n should be between 0 and 10).
  514. /// After rounding, the significand's 10-n least significant
  515. /// bits will be zero.
  516. IMATH_CONSTEXPR14 half round (unsigned int n) const IMATH_NOEXCEPT;
  517. /// @{
  518. /// @name Classification
  519. /// Return true if a normalized number, a denormalized number, or
  520. /// zero.
  521. constexpr bool isFinite() const IMATH_NOEXCEPT;
  522. /// Return true if a normalized number.
  523. constexpr bool isNormalized() const IMATH_NOEXCEPT;
  524. /// Return true if a denormalized number.
  525. constexpr bool isDenormalized() const IMATH_NOEXCEPT;
  526. /// Return true if zero.
  527. constexpr bool isZero() const IMATH_NOEXCEPT;
  528. /// Return true if NAN.
  529. constexpr bool isNan() const IMATH_NOEXCEPT;
  530. /// Return true if a positive or a negative infinity
  531. constexpr bool isInfinity() const IMATH_NOEXCEPT;
  532. /// Return true if the sign bit is set (negative)
  533. constexpr bool isNegative() const IMATH_NOEXCEPT;
  534. /// @}
  535. /// @{
  536. /// @name Special values
  537. /// Return +infinity
  538. static constexpr half posInf() IMATH_NOEXCEPT;
  539. /// Return -infinity
  540. static constexpr half negInf() IMATH_NOEXCEPT;
  541. /// Returns a NAN with the bit pattern 0111111111111111
  542. static constexpr half qNan() IMATH_NOEXCEPT;
  543. /// Return a NAN with the bit pattern 0111110111111111
  544. static constexpr half sNan() IMATH_NOEXCEPT;
  545. /// @}
  546. /// @{
  547. /// @name Access to the internal representation
  548. /// Return the bit pattern
  549. constexpr uint16_t bits () const IMATH_NOEXCEPT;
  550. /// Set the bit pattern
  551. IMATH_CONSTEXPR14 void setBits (uint16_t bits) IMATH_NOEXCEPT;
  552. /// @}
  553. public:
  554. static_assert (sizeof (float) == sizeof (uint32_t),
  555. "Assumption about the size of floats correct");
  556. using uif = imath_half_uif;
  557. private:
  558. constexpr uint16_t mantissa() const IMATH_NOEXCEPT;
  559. constexpr uint16_t exponent() const IMATH_NOEXCEPT;
  560. uint16_t _h;
  561. };
  562. //----------------------------
  563. // Half-from-float constructor
  564. //----------------------------
  565. inline half::half (float f) IMATH_NOEXCEPT
  566. : _h (imath_float_to_half (f))
  567. {
  568. }
  569. //------------------------------------------
  570. // Half from raw bits constructor
  571. //------------------------------------------
  572. inline constexpr half::half (FromBitsTag, uint16_t bits) IMATH_NOEXCEPT : _h (bits)
  573. {}
  574. //-------------------------
  575. // Half-to-float conversion
  576. //-------------------------
  577. inline half::operator float() const IMATH_NOEXCEPT
  578. {
  579. return imath_half_to_float (_h);
  580. }
  581. //-------------------------
  582. // Round to n-bit precision
  583. //-------------------------
  584. inline IMATH_CONSTEXPR14 half
  585. half::round (unsigned int n) const IMATH_NOEXCEPT
  586. {
  587. //
  588. // Parameter check.
  589. //
  590. if (n >= 10)
  591. return *this;
  592. //
  593. // Disassemble h into the sign, s,
  594. // and the combined exponent and significand, e.
  595. //
  596. uint16_t s = _h & 0x8000;
  597. uint16_t e = _h & 0x7fff;
  598. //
  599. // Round the exponent and significand to the nearest value
  600. // where ones occur only in the (10-n) most significant bits.
  601. // Note that the exponent adjusts automatically if rounding
  602. // up causes the significand to overflow.
  603. //
  604. e >>= 9 - n;
  605. e += e & 1;
  606. e <<= 9 - n;
  607. //
  608. // Check for exponent overflow.
  609. //
  610. if (e >= 0x7c00)
  611. {
  612. //
  613. // Overflow occurred -- truncate instead of rounding.
  614. //
  615. e = _h;
  616. e >>= 10 - n;
  617. e <<= 10 - n;
  618. }
  619. //
  620. // Put the original sign bit back.
  621. //
  622. half h (FromBits, s | e);
  623. return h;
  624. }
  625. //-----------------------
  626. // Other inline functions
  627. //-----------------------
  628. inline constexpr half
  629. half::operator-() const IMATH_NOEXCEPT
  630. {
  631. return half (FromBits, bits() ^ 0x8000);
  632. }
  633. inline half&
  634. half::operator= (float f) IMATH_NOEXCEPT
  635. {
  636. *this = half (f);
  637. return *this;
  638. }
  639. inline half&
  640. half::operator+= (half h) IMATH_NOEXCEPT
  641. {
  642. *this = half (float (*this) + float (h));
  643. return *this;
  644. }
  645. inline half&
  646. half::operator+= (float f) IMATH_NOEXCEPT
  647. {
  648. *this = half (float (*this) + f);
  649. return *this;
  650. }
  651. inline half&
  652. half::operator-= (half h) IMATH_NOEXCEPT
  653. {
  654. *this = half (float (*this) - float (h));
  655. return *this;
  656. }
  657. inline half&
  658. half::operator-= (float f) IMATH_NOEXCEPT
  659. {
  660. *this = half (float (*this) - f);
  661. return *this;
  662. }
  663. inline half&
  664. half::operator*= (half h) IMATH_NOEXCEPT
  665. {
  666. *this = half (float (*this) * float (h));
  667. return *this;
  668. }
  669. inline half&
  670. half::operator*= (float f) IMATH_NOEXCEPT
  671. {
  672. *this = half (float (*this) * f);
  673. return *this;
  674. }
  675. inline half&
  676. half::operator/= (half h) IMATH_NOEXCEPT
  677. {
  678. *this = half (float (*this) / float (h));
  679. return *this;
  680. }
  681. inline half&
  682. half::operator/= (float f) IMATH_NOEXCEPT
  683. {
  684. *this = half (float (*this) / f);
  685. return *this;
  686. }
  687. inline constexpr uint16_t
  688. half::mantissa() const IMATH_NOEXCEPT
  689. {
  690. return _h & 0x3ff;
  691. }
  692. inline constexpr uint16_t
  693. half::exponent() const IMATH_NOEXCEPT
  694. {
  695. return (_h >> 10) & 0x001f;
  696. }
  697. inline constexpr bool
  698. half::isFinite() const IMATH_NOEXCEPT
  699. {
  700. return exponent() < 31;
  701. }
  702. inline constexpr bool
  703. half::isNormalized() const IMATH_NOEXCEPT
  704. {
  705. return exponent() > 0 && exponent() < 31;
  706. }
  707. inline constexpr bool
  708. half::isDenormalized() const IMATH_NOEXCEPT
  709. {
  710. return exponent() == 0 && mantissa() != 0;
  711. }
  712. inline constexpr bool
  713. half::isZero() const IMATH_NOEXCEPT
  714. {
  715. return (_h & 0x7fff) == 0;
  716. }
  717. inline constexpr bool
  718. half::isNan() const IMATH_NOEXCEPT
  719. {
  720. return exponent() == 31 && mantissa() != 0;
  721. }
  722. inline constexpr bool
  723. half::isInfinity() const IMATH_NOEXCEPT
  724. {
  725. return exponent() == 31 && mantissa() == 0;
  726. }
  727. inline constexpr bool
  728. half::isNegative() const IMATH_NOEXCEPT
  729. {
  730. return (_h & 0x8000) != 0;
  731. }
  732. inline constexpr half
  733. half::posInf() IMATH_NOEXCEPT
  734. {
  735. return half (FromBits, 0x7c00);
  736. }
  737. inline constexpr half
  738. half::negInf() IMATH_NOEXCEPT
  739. {
  740. return half (FromBits, 0xfc00);
  741. }
  742. inline constexpr half
  743. half::qNan() IMATH_NOEXCEPT
  744. {
  745. return half (FromBits, 0x7fff);
  746. }
  747. inline constexpr half
  748. half::sNan() IMATH_NOEXCEPT
  749. {
  750. return half (FromBits, 0x7dff);
  751. }
  752. inline constexpr uint16_t
  753. half::bits() const IMATH_NOEXCEPT
  754. {
  755. return _h;
  756. }
  757. inline IMATH_CONSTEXPR14 void
  758. half::setBits (uint16_t bits) IMATH_NOEXCEPT
  759. {
  760. _h = bits;
  761. }
  762. IMATH_INTERNAL_NAMESPACE_HEADER_EXIT
  763. /// Output h to os, formatted as a float
  764. IMATH_EXPORT std::ostream& operator<< (std::ostream& os, IMATH_INTERNAL_NAMESPACE::half h);
  765. /// Input h from is
  766. IMATH_EXPORT std::istream& operator>> (std::istream& is, IMATH_INTERNAL_NAMESPACE::half& h);
  767. #include <limits>
  768. namespace std
  769. {
  770. template <> class numeric_limits<IMATH_INTERNAL_NAMESPACE::half>
  771. {
  772. public:
  773. static const bool is_specialized = true;
  774. static constexpr IMATH_INTERNAL_NAMESPACE::half min () IMATH_NOEXCEPT
  775. {
  776. return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x0400); /*HALF_MIN*/
  777. }
  778. static constexpr IMATH_INTERNAL_NAMESPACE::half max () IMATH_NOEXCEPT
  779. {
  780. return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x7bff); /*HALF_MAX*/
  781. }
  782. static constexpr IMATH_INTERNAL_NAMESPACE::half lowest ()
  783. {
  784. return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0xfbff); /* -HALF_MAX */
  785. }
  786. static constexpr int digits = HALF_MANT_DIG;
  787. static constexpr int digits10 = HALF_DIG;
  788. static constexpr int max_digits10 = HALF_DECIMAL_DIG;
  789. static constexpr bool is_signed = true;
  790. static constexpr bool is_integer = false;
  791. static constexpr bool is_exact = false;
  792. static constexpr int radix = HALF_RADIX;
  793. static constexpr IMATH_INTERNAL_NAMESPACE::half epsilon () IMATH_NOEXCEPT
  794. {
  795. return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x1400); /*HALF_EPSILON*/
  796. }
  797. static constexpr IMATH_INTERNAL_NAMESPACE::half round_error () IMATH_NOEXCEPT
  798. {
  799. return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x3800); /*0.5*/
  800. }
  801. static constexpr int min_exponent = HALF_DENORM_MIN_EXP;
  802. static constexpr int min_exponent10 = HALF_DENORM_MIN_10_EXP;
  803. static constexpr int max_exponent = HALF_MAX_EXP;
  804. static constexpr int max_exponent10 = HALF_MAX_10_EXP;
  805. static constexpr bool has_infinity = true;
  806. static constexpr bool has_quiet_NaN = true;
  807. static constexpr bool has_signaling_NaN = true;
  808. static constexpr float_denorm_style has_denorm = denorm_present;
  809. static constexpr bool has_denorm_loss = false;
  810. static constexpr IMATH_INTERNAL_NAMESPACE::half infinity () IMATH_NOEXCEPT
  811. {
  812. return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x7c00); /*half::posInf()*/
  813. }
  814. static constexpr IMATH_INTERNAL_NAMESPACE::half quiet_NaN () IMATH_NOEXCEPT
  815. {
  816. return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x7fff); /*half::qNan()*/
  817. }
  818. static constexpr IMATH_INTERNAL_NAMESPACE::half signaling_NaN () IMATH_NOEXCEPT
  819. {
  820. return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x7dff); /*half::sNan()*/
  821. }
  822. static constexpr IMATH_INTERNAL_NAMESPACE::half denorm_min () IMATH_NOEXCEPT
  823. {
  824. return IMATH_INTERNAL_NAMESPACE::half (IMATH_INTERNAL_NAMESPACE::half::FromBits, 0x0001); /*HALF_DENORM_MIN*/
  825. }
  826. static constexpr bool is_iec559 = false;
  827. static constexpr bool is_bounded = false;
  828. static constexpr bool is_modulo = false;
  829. static constexpr bool traps = true;
  830. static constexpr bool tinyness_before = false;
  831. static constexpr float_round_style round_style = round_to_nearest;
  832. };
  833. } // namespace std
  834. //----------
  835. // Debugging
  836. //----------
  837. IMATH_EXPORT void printBits (std::ostream& os, IMATH_INTERNAL_NAMESPACE::half h);
  838. IMATH_EXPORT void printBits (std::ostream& os, float f);
  839. IMATH_EXPORT void printBits (char c[19], IMATH_INTERNAL_NAMESPACE::half h);
  840. IMATH_EXPORT void printBits (char c[35], float f);
  841. # if !defined(__CUDACC__) && !defined(__CUDA_FP16_HPP__)
  842. using half = IMATH_INTERNAL_NAMESPACE::half;
  843. # else
  844. # include <cuda_fp16.h>
  845. # endif
  846. #endif // __cplusplus
  847. #endif // IMATH_HALF_H_