1+ //! Support for floating point types compatible with IEEE 754.
2+
13use crate :: { Category , ExpInt , IEK_INF , IEK_NAN , IEK_ZERO } ;
24use crate :: { Float , FloatConvert , ParseError , Round , Status , StatusAnd } ;
35
@@ -8,6 +10,12 @@ use core::marker::PhantomData;
810use core:: mem;
911use core:: ops:: Neg ;
1012
13+ /// A floating point number that uses IEEE semantics.
14+ ///
15+ /// Usually you will want to use the available type aliases of this type
16+ /// (e.g., [`Single`], [`Double`]) rather than referencing it directly.
17+ ///
18+ /// If `S` implements [`Semantics`], this type will implement [`Float`].
1119#[ must_use]
1220pub struct IeeeFloat < S > {
1321 /// Absolute significand value (including the integer bit).
@@ -84,7 +92,7 @@ pub enum NonfiniteBehavior {
8492 /// Only the Float8E5M2 has this behavior. There is no Inf representation. A
8593 /// value is NaN if the exponent field and the mantissa field are all 1s.
8694 /// This behavior matches the FP8 E4M3 type described in
87- /// https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
95+ /// < https://arxiv.org/abs/2209.05433> . We treat both signed and unsigned NaNs
8896 /// as non-signalling, although the paper does not state whether the NaN
8997 /// values are signalling or not.
9098 NanOnly ,
@@ -276,46 +284,75 @@ impl<S> Clone for IeeeFloat<S> {
276284}
277285
278286macro_rules! ieee_semantics {
279- ( $( $name: ident = $sem: ident( $bits: tt : $exp_bits: tt) $( { $( $extra: tt) * } ) ?) ,* $( , ) ?) => {
280- $( pub struct $sem; ) *
281- $( pub type $name = IeeeFloat <$sem>; ) *
282- $( impl Semantics for $sem {
283- const BITS : usize = $bits;
284- const EXP_BITS : usize = $exp_bits;
287+ ( $(
288+ $( #[ $meta: meta] ) *
289+ $name: ident = $sem: ident( $bits: tt : $exp_bits: tt) $( { $( $extra: tt) * } ) ?
290+ ) ,* $( , ) ?) => {
291+ $(
292+ #[ doc = concat!( "Floating point semantics for [`" , stringify!( $name) , "`]." ) ]
293+ ///
294+ /// See that type for more details.
295+ pub struct $sem;
296+
297+ $( #[ $meta] ) *
298+ pub type $name = IeeeFloat <$sem>;
285299
286- $( $( $extra) * ) ?
287- } ) *
300+ impl Semantics for $sem {
301+ const BITS : usize = $bits;
302+ const EXP_BITS : usize = $exp_bits;
303+
304+ $( $( $extra) * ) ?
305+ }
306+ ) *
288307 }
289308}
290309
291310ieee_semantics ! {
311+ /// IEEE binary16 half-precision (16-bit) floating point number.
292312 Half = HalfS ( 16 : 5 ) ,
313+
314+ /// IEEE binary32 single-precision (32-bit) floating point number.
293315 Single = SingleS ( 32 : 8 ) ,
316+
317+ /// IEEE binary64 double-precision (64-bit) floating point number.
294318 Double = DoubleS ( 64 : 11 ) ,
295- Quad = QuadS ( 128 : 15 ) ,
296319
297- // Non-standard IEEE-like semantics:
320+ /// IEEE binary128 quadruple-precision (128-bit) floating point number.
321+ Quad = QuadS ( 128 : 15 ) ,
298322
299- // FIXME(eddyb) document this as "Brain Float 16" (C++ didn't have docs).
323+ /// 16-bit brain floating point number.
324+ ///
325+ /// This is not an IEEE kind but uses the same semantics.
300326 BFloat = BFloatS ( 16 : 8 ) ,
301327
302- // 8-bit floating point number following IEEE-754 conventions with bit
303- // layout S1E5M2 as described in https://arxiv.org/abs/2209.05433.
328+ /// 8-bit floating point number with S1E5M2 bit layout.
329+ ///
330+ /// Follows IEEE-754 conventions with S1E5M2 bit layout as described in
331+ /// <https://arxiv.org/abs/2209.05433>.
304332 Float8E5M2 = Float8E5M2S ( 8 : 5 ) ,
305333
306- // 8-bit floating point number mostly following IEEE-754 conventions with
307- // bit layout S1E4M3 as described in https://arxiv.org/abs/2209.05433.
308- // Unlike IEEE-754 types, there are no infinity values, and NaN is
309- // represented with the exponent and mantissa bits set to all 1s.
334+ /// 8-bit floating point number with S1E4M3 bit layout.
335+ ///
336+ /// This type mostly follows IEEE-754 conventions with a
337+ /// bit layout S1E4M3 as described in <https://arxiv.org/abs/2209.05433>.
338+ /// Unlike IEEE-754 types, there are no infinity values, and NaN is
339+ /// represented with the exponent and mantissa bits set to all 1s.
310340 Float8E4M3FN = Float8E4M3FNS ( 8 : 4 ) {
311341 const NONFINITE_BEHAVIOR : NonfiniteBehavior = NonfiniteBehavior :: NanOnly ;
312342 } ,
313343}
314344
315345// FIXME(eddyb) consider moving X87-specific logic to a "has explicit integer bit"
316346// associated `const` on `Semantics` itself.
347+ /// Floating point semantics for [`X87DoubleExtended`].
348+ ///
349+ /// See that type for more details.
317350pub struct X87DoubleExtendedS ;
351+
352+ /// 80-bit floating point number that uses IEEE extended precision semantics, as used
353+ /// by x87 `long double`.
318354pub type X87DoubleExtended = IeeeFloat < X87DoubleExtendedS > ;
355+
319356impl Semantics for X87DoubleExtendedS {
320357 const BITS : usize = 80 ;
321358 const EXP_BITS : usize = 15 ;
0 commit comments