2023-07-06 15:01:23 +01:00
// Copyright 2019 Montgomery Edwards⁴⁴⁸ and Faye Amacker
package half_test
import (
"bytes"
"crypto/sha512"
"encoding/binary"
"encoding/hex"
"fmt"
"math"
"testing"
2024-04-21 15:15:00 +01:00
float16 "git.andr3h3nriqu3s.com/andr3/gotch/half"
2023-07-06 15:01:23 +01:00
)
// wantF32toF16bits is a tiny subset of expected values
var wantF32toF16bits = [ ] struct {
in float32
out uint16
} {
// generated to provide 100% code coverage plus additional tests for rounding, etc.
{ in : math . Float32frombits ( 0x00000000 ) , out : 0x0000 } , // in f32=0.000000, out f16=0
{ in : math . Float32frombits ( 0x00000001 ) , out : 0x0000 } , // in f32=0.000000, out f16=0
{ in : math . Float32frombits ( 0x00001fff ) , out : 0x0000 } , // in f32=0.000000, out f16=0
{ in : math . Float32frombits ( 0x00002000 ) , out : 0x0000 } , // in f32=0.000000, out f16=0
{ in : math . Float32frombits ( 0x00003fff ) , out : 0x0000 } , // in f32=0.000000, out f16=0
{ in : math . Float32frombits ( 0x00004000 ) , out : 0x0000 } , // in f32=0.000000, out f16=0
{ in : math . Float32frombits ( 0x007fffff ) , out : 0x0000 } , // in f32=0.000000, out f16=0
{ in : math . Float32frombits ( 0x00800000 ) , out : 0x0000 } , // in f32=0.000000, out f16=0
{ in : math . Float32frombits ( 0x33000000 ) , out : 0x0000 } , // in f32=0.000000, out f16=0
{ in : math . Float32frombits ( 0x33000001 ) , out : 0x0001 } , // in f32=0.000000, out f16=0.000000059604645
{ in : math . Float32frombits ( 0x33000002 ) , out : 0x0001 } , // in f32=0.000000, out f16=0.000000059604645
{ in : math . Float32frombits ( 0x387fc000 ) , out : 0x03ff } , // in f32=0.000061, out f16=0.00006097555 // exp32=-15 (underflows binary16 exp) but round-trips
{ in : math . Float32frombits ( 0x387fffff ) , out : 0x0400 } , // in f32=0.000061, out f16=0.000061035156
{ in : math . Float32frombits ( 0x38800000 ) , out : 0x0400 } , // in f32=0.000061, out f16=0.000061035156
{ in : math . Float32frombits ( 0x38801fff ) , out : 0x0401 } , // in f32=0.000061, out f16=0.00006109476
{ in : math . Float32frombits ( 0x38802000 ) , out : 0x0401 } , // in f32=0.000061, out f16=0.00006109476
{ in : math . Float32frombits ( 0x38803fff ) , out : 0x0402 } , // in f32=0.000061, out f16=0.000061154366
{ in : math . Float32frombits ( 0x38804000 ) , out : 0x0402 } , // in f32=0.000061, out f16=0.000061154366
{ in : math . Float32frombits ( 0x33bfffff ) , out : 0x0001 } , // in f32=0.000000, out f16=0.000000059604645
{ in : math . Float32frombits ( 0x33c00000 ) , out : 0x0002 } , // in f32=0.000000, out f16=0.00000011920929
{ in : math . Float32frombits ( 0x33c00001 ) , out : 0x0002 } , // in f32=0.000000, out f16=0.00000011920929
{ in : math . Float32frombits ( 0x477fffff ) , out : 0x7c00 } , // in f32=65535.996094, out f16=+Inf
{ in : math . Float32frombits ( 0x47800000 ) , out : 0x7c00 } , // in f32=65536.000000, out f16=+Inf
{ in : math . Float32frombits ( 0x7f7fffff ) , out : 0x7c00 } , // in f32=340282346638528859811704183484516925440.000000, out f16=+Inf
{ in : math . Float32frombits ( 0x7f800000 ) , out : 0x7c00 } , // in f32=+Inf, out f16=+Inf
{ in : math . Float32frombits ( 0x7f801fff ) , out : 0x7e00 } , // in f32=NaN, out f16=NaN
{ in : math . Float32frombits ( 0x7f802000 ) , out : 0x7e01 } , // in f32=NaN, out f16=NaN
{ in : math . Float32frombits ( 0x7f803fff ) , out : 0x7e01 } , // in f32=NaN, out f16=NaN
{ in : math . Float32frombits ( 0x7f804000 ) , out : 0x7e02 } , // in f32=NaN, out f16=NaN
{ in : math . Float32frombits ( 0x7fffffff ) , out : 0x7fff } , // in f32=NaN, out f16=NaN
{ in : math . Float32frombits ( 0x80000000 ) , out : 0x8000 } , // in f32=-0.000000, out f16=-0
{ in : math . Float32frombits ( 0x80001fff ) , out : 0x8000 } , // in f32=-0.000000, out f16=-0
{ in : math . Float32frombits ( 0x80002000 ) , out : 0x8000 } , // in f32=-0.000000, out f16=-0
{ in : math . Float32frombits ( 0x80003fff ) , out : 0x8000 } , // in f32=-0.000000, out f16=-0
{ in : math . Float32frombits ( 0x80004000 ) , out : 0x8000 } , // in f32=-0.000000, out f16=-0
{ in : math . Float32frombits ( 0x807fffff ) , out : 0x8000 } , // in f32=-0.000000, out f16=-0
{ in : math . Float32frombits ( 0x80800000 ) , out : 0x8000 } , // in f32=-0.000000, out f16=-0
{ in : math . Float32frombits ( 0xb87fc000 ) , out : 0x83ff } , // in f32=-0.000061, out f16=-0.00006097555 // exp32=-15 (underflows binary16 exp) but round-trips
{ in : math . Float32frombits ( 0xb87fffff ) , out : 0x8400 } , // in f32=-0.000061, out f16=-0.000061035156
{ in : math . Float32frombits ( 0xb8800000 ) , out : 0x8400 } , // in f32=-0.000061, out f16=-0.000061035156
{ in : math . Float32frombits ( 0xb8801fff ) , out : 0x8401 } , // in f32=-0.000061, out f16=-0.00006109476
{ in : math . Float32frombits ( 0xb8802000 ) , out : 0x8401 } , // in f32=-0.000061, out f16=-0.00006109476
{ in : math . Float32frombits ( 0xb8803fff ) , out : 0x8402 } , // in f32=-0.000061, out f16=-0.000061154366
{ in : math . Float32frombits ( 0xb8804000 ) , out : 0x8402 } , // in f32=-0.000061, out f16=-0.000061154366
{ in : math . Float32frombits ( 0xc77fffff ) , out : 0xfc00 } , // in f32=-65535.996094, out f16=-Inf
{ in : math . Float32frombits ( 0xc7800000 ) , out : 0xfc00 } , // in f32=-65536.000000, out f16=-Inf
{ in : math . Float32frombits ( 0xff7fffff ) , out : 0xfc00 } , // in f32=-340282346638528859811704183484516925440.000000, out f16=-Inf
{ in : math . Float32frombits ( 0xff800000 ) , out : 0xfc00 } , // in f32=-Inf, out f16=-Inf
{ in : math . Float32frombits ( 0xff801fff ) , out : 0xfe00 } , // in f32=NaN, out f16=NaN
{ in : math . Float32frombits ( 0xff802000 ) , out : 0xfe01 } , // in f32=NaN, out f16=NaN
{ in : math . Float32frombits ( 0xff803fff ) , out : 0xfe01 } , // in f32=NaN, out f16=NaN
{ in : math . Float32frombits ( 0xff804000 ) , out : 0xfe02 } , // in f32=NaN, out f16=NaN
// additional tests
{ in : math . Float32frombits ( 0xc77ff000 ) , out : 0xfc00 } , // in f32=-65520.000000, out f16=-Inf
{ in : math . Float32frombits ( 0xc77fef00 ) , out : 0xfbff } , // in f32=-65519.000000, out f16=-65504
{ in : math . Float32frombits ( 0xc77fee00 ) , out : 0xfbff } , // in f32=-65518.000000, out f16=-65504
{ in : math . Float32frombits ( 0xc5802000 ) , out : 0xec01 } , // in f32=-4100.000000, out f16=-4100
{ in : math . Float32frombits ( 0xc5801800 ) , out : 0xec01 } , // in f32=-4099.000000, out f16=-4100
{ in : math . Float32frombits ( 0xc5801000 ) , out : 0xec00 } , // in f32=-4098.000000, out f16=-4096
{ in : math . Float32frombits ( 0xc5800800 ) , out : 0xec00 } , // in f32=-4097.000000, out f16=-4096
{ in : math . Float32frombits ( 0xc5800000 ) , out : 0xec00 } , // in f32=-4096.000000, out f16=-4096
{ in : math . Float32frombits ( 0xc57ff000 ) , out : 0xec00 } , // in f32=-4095.000000, out f16=-4096
{ in : math . Float32frombits ( 0xc57fe000 ) , out : 0xebff } , // in f32=-4094.000000, out f16=-4094
{ in : math . Float32frombits ( 0xc57fd000 ) , out : 0xebfe } , // in f32=-4093.000000, out f16=-4092
{ in : math . Float32frombits ( 0xc5002000 ) , out : 0xe801 } , // in f32=-2050.000000, out f16=-2050
{ in : math . Float32frombits ( 0xc5001000 ) , out : 0xe800 } , // in f32=-2049.000000, out f16=-2048
{ in : math . Float32frombits ( 0xc5000829 ) , out : 0xe800 } , // in f32=-2048.510010, out f16=-2048
{ in : math . Float32frombits ( 0xc5000800 ) , out : 0xe800 } , // in f32=-2048.500000, out f16=-2048
{ in : math . Float32frombits ( 0xc50007d7 ) , out : 0xe800 } , // in f32=-2048.489990, out f16=-2048
{ in : math . Float32frombits ( 0xc5000000 ) , out : 0xe800 } , // in f32=-2048.000000, out f16=-2048
{ in : math . Float32frombits ( 0xc4fff052 ) , out : 0xe800 } , // in f32=-2047.510010, out f16=-2048
{ in : math . Float32frombits ( 0xc4fff000 ) , out : 0xe800 } , // in f32=-2047.500000, out f16=-2048
{ in : math . Float32frombits ( 0xc4ffefae ) , out : 0xe7ff } , // in f32=-2047.489990, out f16=-2047
{ in : math . Float32frombits ( 0xc4ffe000 ) , out : 0xe7ff } , // in f32=-2047.000000, out f16=-2047
{ in : math . Float32frombits ( 0xc4ffc000 ) , out : 0xe7fe } , // in f32=-2046.000000, out f16=-2046
{ in : math . Float32frombits ( 0xc4ffa000 ) , out : 0xe7fd } , // in f32=-2045.000000, out f16=-2045
{ in : math . Float32frombits ( 0xbf800000 ) , out : 0xbc00 } , // in f32=-1.000000, out f16=-1
{ in : math . Float32frombits ( 0xbf028f5c ) , out : 0xb814 } , // in f32=-0.510000, out f16=-0.5097656
{ in : math . Float32frombits ( 0xbf000000 ) , out : 0xb800 } , // in f32=-0.500000, out f16=-0.5
{ in : math . Float32frombits ( 0xbefae148 ) , out : 0xb7d7 } , // in f32=-0.490000, out f16=-0.48999023
{ in : math . Float32frombits ( 0x3efae148 ) , out : 0x37d7 } , // in f32=0.490000, out f16=0.48999023
{ in : math . Float32frombits ( 0x3f000000 ) , out : 0x3800 } , // in f32=0.500000, out f16=0.5
{ in : math . Float32frombits ( 0x3f028f5c ) , out : 0x3814 } , // in f32=0.510000, out f16=0.5097656
{ in : math . Float32frombits ( 0x3f800000 ) , out : 0x3c00 } , // in f32=1.000000, out f16=1
{ in : math . Float32frombits ( 0x3fbeb852 ) , out : 0x3df6 } , // in f32=1.490000, out f16=1.4902344
{ in : math . Float32frombits ( 0x3fc00000 ) , out : 0x3e00 } , // in f32=1.500000, out f16=1.5
{ in : math . Float32frombits ( 0x3fc147ae ) , out : 0x3e0a } , // in f32=1.510000, out f16=1.5097656
{ in : math . Float32frombits ( 0x3fcf1bbd ) , out : 0x3e79 } , // in f32=1.618034, out f16=1.6181641
{ in : math . Float32frombits ( 0x401f5c29 ) , out : 0x40fb } , // in f32=2.490000, out f16=2.4902344
{ in : math . Float32frombits ( 0x40200000 ) , out : 0x4100 } , // in f32=2.500000, out f16=2.5
{ in : math . Float32frombits ( 0x4020a3d7 ) , out : 0x4105 } , // in f32=2.510000, out f16=2.5097656
{ in : math . Float32frombits ( 0x402df854 ) , out : 0x4170 } , // in f32=2.718282, out f16=2.71875
{ in : math . Float32frombits ( 0x40490fdb ) , out : 0x4248 } , // in f32=3.141593, out f16=3.140625
{ in : math . Float32frombits ( 0x40b00000 ) , out : 0x4580 } , // in f32=5.500000, out f16=5.5
{ in : math . Float32frombits ( 0x44ffa000 ) , out : 0x67fd } , // in f32=2045.000000, out f16=2045
{ in : math . Float32frombits ( 0x44ffc000 ) , out : 0x67fe } , // in f32=2046.000000, out f16=2046
{ in : math . Float32frombits ( 0x44ffe000 ) , out : 0x67ff } , // in f32=2047.000000, out f16=2047
{ in : math . Float32frombits ( 0x44ffefae ) , out : 0x67ff } , // in f32=2047.489990, out f16=2047
{ in : math . Float32frombits ( 0x44fff000 ) , out : 0x6800 } , // in f32=2047.500000, out f16=2048
{ in : math . Float32frombits ( 0x44fff052 ) , out : 0x6800 } , // in f32=2047.510010, out f16=2048
{ in : math . Float32frombits ( 0x45000000 ) , out : 0x6800 } , // in f32=2048.000000, out f16=2048
{ in : math . Float32frombits ( 0x450007d7 ) , out : 0x6800 } , // in f32=2048.489990, out f16=2048
{ in : math . Float32frombits ( 0x45000800 ) , out : 0x6800 } , // in f32=2048.500000, out f16=2048
{ in : math . Float32frombits ( 0x45000829 ) , out : 0x6800 } , // in f32=2048.510010, out f16=2048
{ in : math . Float32frombits ( 0x45001000 ) , out : 0x6800 } , // in f32=2049.000000, out f16=2048
{ in : math . Float32frombits ( 0x450017d7 ) , out : 0x6801 } , // in f32=2049.489990, out f16=2050
{ in : math . Float32frombits ( 0x45001800 ) , out : 0x6801 } , // in f32=2049.500000, out f16=2050
{ in : math . Float32frombits ( 0x45001829 ) , out : 0x6801 } , // in f32=2049.510010, out f16=2050
{ in : math . Float32frombits ( 0x45002000 ) , out : 0x6801 } , // in f32=2050.000000, out f16=2050
{ in : math . Float32frombits ( 0x45003000 ) , out : 0x6802 } , // in f32=2051.000000, out f16=2052
{ in : math . Float32frombits ( 0x457fd000 ) , out : 0x6bfe } , // in f32=4093.000000, out f16=4092
{ in : math . Float32frombits ( 0x457fe000 ) , out : 0x6bff } , // in f32=4094.000000, out f16=4094
{ in : math . Float32frombits ( 0x457ff000 ) , out : 0x6c00 } , // in f32=4095.000000, out f16=4096
{ in : math . Float32frombits ( 0x45800000 ) , out : 0x6c00 } , // in f32=4096.000000, out f16=4096
{ in : math . Float32frombits ( 0x45800800 ) , out : 0x6c00 } , // in f32=4097.000000, out f16=4096
{ in : math . Float32frombits ( 0x45801000 ) , out : 0x6c00 } , // in f32=4098.000000, out f16=4096
{ in : math . Float32frombits ( 0x45801800 ) , out : 0x6c01 } , // in f32=4099.000000, out f16=4100
{ in : math . Float32frombits ( 0x45802000 ) , out : 0x6c01 } , // in f32=4100.000000, out f16=4100
{ in : math . Float32frombits ( 0x45ad9c00 ) , out : 0x6d6d } , // in f32=5555.500000, out f16=5556
{ in : math . Float32frombits ( 0x45ffe800 ) , out : 0x6fff } , // in f32=8189.000000, out f16=8188
{ in : math . Float32frombits ( 0x45fff000 ) , out : 0x7000 } , // in f32=8190.000000, out f16=8192
{ in : math . Float32frombits ( 0x45fff800 ) , out : 0x7000 } , // in f32=8191.000000, out f16=8192
{ in : math . Float32frombits ( 0x46000000 ) , out : 0x7000 } , // in f32=8192.000000, out f16=8192
{ in : math . Float32frombits ( 0x46000400 ) , out : 0x7000 } , // in f32=8193.000000, out f16=8192
{ in : math . Float32frombits ( 0x46000800 ) , out : 0x7000 } , // in f32=8194.000000, out f16=8192
{ in : math . Float32frombits ( 0x46000c00 ) , out : 0x7000 } , // in f32=8195.000000, out f16=8192
{ in : math . Float32frombits ( 0x46001000 ) , out : 0x7000 } , // in f32=8196.000000, out f16=8192
{ in : math . Float32frombits ( 0x46001400 ) , out : 0x7001 } , // in f32=8197.000000, out f16=8200
{ in : math . Float32frombits ( 0x46001800 ) , out : 0x7001 } , // in f32=8198.000000, out f16=8200
{ in : math . Float32frombits ( 0x46001c00 ) , out : 0x7001 } , // in f32=8199.000000, out f16=8200
{ in : math . Float32frombits ( 0x46002000 ) , out : 0x7001 } , // in f32=8200.000000, out f16=8200
{ in : math . Float32frombits ( 0x46002400 ) , out : 0x7001 } , // in f32=8201.000000, out f16=8200
{ in : math . Float32frombits ( 0x46002800 ) , out : 0x7001 } , // in f32=8202.000000, out f16=8200
{ in : math . Float32frombits ( 0x46002c00 ) , out : 0x7001 } , // in f32=8203.000000, out f16=8200
{ in : math . Float32frombits ( 0x46003000 ) , out : 0x7002 } , // in f32=8204.000000, out f16=8208
{ in : math . Float32frombits ( 0x467fec00 ) , out : 0x73ff } , // in f32=16379.000000, out f16=16376
{ in : math . Float32frombits ( 0x467ff000 ) , out : 0x7400 } , // in f32=16380.000000, out f16=16384
{ in : math . Float32frombits ( 0x467ff400 ) , out : 0x7400 } , // in f32=16381.000000, out f16=16384
{ in : math . Float32frombits ( 0x467ff800 ) , out : 0x7400 } , // in f32=16382.000000, out f16=16384
{ in : math . Float32frombits ( 0x467ffc00 ) , out : 0x7400 } , // in f32=16383.000000, out f16=16384
{ in : math . Float32frombits ( 0x46800000 ) , out : 0x7400 } , // in f32=16384.000000, out f16=16384
{ in : math . Float32frombits ( 0x46800200 ) , out : 0x7400 } , // in f32=16385.000000, out f16=16384
{ in : math . Float32frombits ( 0x46800400 ) , out : 0x7400 } , // in f32=16386.000000, out f16=16384
{ in : math . Float32frombits ( 0x46800600 ) , out : 0x7400 } , // in f32=16387.000000, out f16=16384
{ in : math . Float32frombits ( 0x46800800 ) , out : 0x7400 } , // in f32=16388.000000, out f16=16384
{ in : math . Float32frombits ( 0x46800a00 ) , out : 0x7400 } , // in f32=16389.000000, out f16=16384
{ in : math . Float32frombits ( 0x46800c00 ) , out : 0x7400 } , // in f32=16390.000000, out f16=16384
{ in : math . Float32frombits ( 0x46800e00 ) , out : 0x7400 } , // in f32=16391.000000, out f16=16384
{ in : math . Float32frombits ( 0x46801000 ) , out : 0x7400 } , // in f32=16392.000000, out f16=16384
{ in : math . Float32frombits ( 0x46801200 ) , out : 0x7401 } , // in f32=16393.000000, out f16=16400
{ in : math . Float32frombits ( 0x46801400 ) , out : 0x7401 } , // in f32=16394.000000, out f16=16400
{ in : math . Float32frombits ( 0x46801600 ) , out : 0x7401 } , // in f32=16395.000000, out f16=16400
{ in : math . Float32frombits ( 0x46801800 ) , out : 0x7401 } , // in f32=16396.000000, out f16=16400
{ in : math . Float32frombits ( 0x46801a00 ) , out : 0x7401 } , // in f32=16397.000000, out f16=16400
{ in : math . Float32frombits ( 0x46801c00 ) , out : 0x7401 } , // in f32=16398.000000, out f16=16400
{ in : math . Float32frombits ( 0x46801e00 ) , out : 0x7401 } , // in f32=16399.000000, out f16=16400
{ in : math . Float32frombits ( 0x46802000 ) , out : 0x7401 } , // in f32=16400.000000, out f16=16400
{ in : math . Float32frombits ( 0x46802200 ) , out : 0x7401 } , // in f32=16401.000000, out f16=16400
{ in : math . Float32frombits ( 0x46802400 ) , out : 0x7401 } , // in f32=16402.000000, out f16=16400
{ in : math . Float32frombits ( 0x46802600 ) , out : 0x7401 } , // in f32=16403.000000, out f16=16400
{ in : math . Float32frombits ( 0x46802800 ) , out : 0x7401 } , // in f32=16404.000000, out f16=16400
{ in : math . Float32frombits ( 0x46802a00 ) , out : 0x7401 } , // in f32=16405.000000, out f16=16400
{ in : math . Float32frombits ( 0x46802c00 ) , out : 0x7401 } , // in f32=16406.000000, out f16=16400
{ in : math . Float32frombits ( 0x46802e00 ) , out : 0x7401 } , // in f32=16407.000000, out f16=16400
{ in : math . Float32frombits ( 0x46803000 ) , out : 0x7402 } , // in f32=16408.000000, out f16=16416
{ in : math . Float32frombits ( 0x46ffee00 ) , out : 0x77ff } , // in f32=32759.000000, out f16=32752
{ in : math . Float32frombits ( 0x46fff000 ) , out : 0x7800 } , // in f32=32760.000000, out f16=32768
{ in : math . Float32frombits ( 0x46fff200 ) , out : 0x7800 } , // in f32=32761.000000, out f16=32768
{ in : math . Float32frombits ( 0x46fff400 ) , out : 0x7800 } , // in f32=32762.000000, out f16=32768
{ in : math . Float32frombits ( 0x46fff600 ) , out : 0x7800 } , // in f32=32763.000000, out f16=32768
{ in : math . Float32frombits ( 0x46fff800 ) , out : 0x7800 } , // in f32=32764.000000, out f16=32768
{ in : math . Float32frombits ( 0x46fffa00 ) , out : 0x7800 } , // in f32=32765.000000, out f16=32768
{ in : math . Float32frombits ( 0x46fffc00 ) , out : 0x7800 } , // in f32=32766.000000, out f16=32768
{ in : math . Float32frombits ( 0x46fffe00 ) , out : 0x7800 } , // in f32=32767.000000, out f16=32768
{ in : math . Float32frombits ( 0x47000000 ) , out : 0x7800 } , // in f32=32768.000000, out f16=32768
{ in : math . Float32frombits ( 0x47000100 ) , out : 0x7800 } , // in f32=32769.000000, out f16=32768
{ in : math . Float32frombits ( 0x47000200 ) , out : 0x7800 } , // in f32=32770.000000, out f16=32768
{ in : math . Float32frombits ( 0x47000300 ) , out : 0x7800 } , // in f32=32771.000000, out f16=32768
{ in : math . Float32frombits ( 0x47000400 ) , out : 0x7800 } , // in f32=32772.000000, out f16=32768
{ in : math . Float32frombits ( 0x47000500 ) , out : 0x7800 } , // in f32=32773.000000, out f16=32768
{ in : math . Float32frombits ( 0x47000600 ) , out : 0x7800 } , // in f32=32774.000000, out f16=32768
{ in : math . Float32frombits ( 0x47000700 ) , out : 0x7800 } , // in f32=32775.000000, out f16=32768
{ in : math . Float32frombits ( 0x47000800 ) , out : 0x7800 } , // in f32=32776.000000, out f16=32768
{ in : math . Float32frombits ( 0x47000900 ) , out : 0x7800 } , // in f32=32777.000000, out f16=32768
{ in : math . Float32frombits ( 0x47000a00 ) , out : 0x7800 } , // in f32=32778.000000, out f16=32768
{ in : math . Float32frombits ( 0x47000b00 ) , out : 0x7800 } , // in f32=32779.000000, out f16=32768
{ in : math . Float32frombits ( 0x47000c00 ) , out : 0x7800 } , // in f32=32780.000000, out f16=32768
{ in : math . Float32frombits ( 0x47000d00 ) , out : 0x7800 } , // in f32=32781.000000, out f16=32768
{ in : math . Float32frombits ( 0x47000e00 ) , out : 0x7800 } , // in f32=32782.000000, out f16=32768
{ in : math . Float32frombits ( 0x47000f00 ) , out : 0x7800 } , // in f32=32783.000000, out f16=32768
{ in : math . Float32frombits ( 0x47001000 ) , out : 0x7800 } , // in f32=32784.000000, out f16=32768
{ in : math . Float32frombits ( 0x47001100 ) , out : 0x7801 } , // in f32=32785.000000, out f16=32800
{ in : math . Float32frombits ( 0x47001200 ) , out : 0x7801 } , // in f32=32786.000000, out f16=32800
{ in : math . Float32frombits ( 0x47001300 ) , out : 0x7801 } , // in f32=32787.000000, out f16=32800
{ in : math . Float32frombits ( 0x47001400 ) , out : 0x7801 } , // in f32=32788.000000, out f16=32800
{ in : math . Float32frombits ( 0x47001500 ) , out : 0x7801 } , // in f32=32789.000000, out f16=32800
{ in : math . Float32frombits ( 0x47001600 ) , out : 0x7801 } , // in f32=32790.000000, out f16=32800
{ in : math . Float32frombits ( 0x47001700 ) , out : 0x7801 } , // in f32=32791.000000, out f16=32800
{ in : math . Float32frombits ( 0x47001800 ) , out : 0x7801 } , // in f32=32792.000000, out f16=32800
{ in : math . Float32frombits ( 0x47001900 ) , out : 0x7801 } , // in f32=32793.000000, out f16=32800
{ in : math . Float32frombits ( 0x47001a00 ) , out : 0x7801 } , // in f32=32794.000000, out f16=32800
{ in : math . Float32frombits ( 0x47001b00 ) , out : 0x7801 } , // in f32=32795.000000, out f16=32800
{ in : math . Float32frombits ( 0x47001c00 ) , out : 0x7801 } , // in f32=32796.000000, out f16=32800
{ in : math . Float32frombits ( 0x47001d00 ) , out : 0x7801 } , // in f32=32797.000000, out f16=32800
{ in : math . Float32frombits ( 0x47001e00 ) , out : 0x7801 } , // in f32=32798.000000, out f16=32800
{ in : math . Float32frombits ( 0x47001f00 ) , out : 0x7801 } , // in f32=32799.000000, out f16=32800
{ in : math . Float32frombits ( 0x47002000 ) , out : 0x7801 } , // in f32=32800.000000, out f16=32800
{ in : math . Float32frombits ( 0x47002100 ) , out : 0x7801 } , // in f32=32801.000000, out f16=32800
{ in : math . Float32frombits ( 0x47002200 ) , out : 0x7801 } , // in f32=32802.000000, out f16=32800
{ in : math . Float32frombits ( 0x47002300 ) , out : 0x7801 } , // in f32=32803.000000, out f16=32800
{ in : math . Float32frombits ( 0x47002400 ) , out : 0x7801 } , // in f32=32804.000000, out f16=32800
{ in : math . Float32frombits ( 0x47002500 ) , out : 0x7801 } , // in f32=32805.000000, out f16=32800
{ in : math . Float32frombits ( 0x47002600 ) , out : 0x7801 } , // in f32=32806.000000, out f16=32800
{ in : math . Float32frombits ( 0x47002700 ) , out : 0x7801 } , // in f32=32807.000000, out f16=32800
{ in : math . Float32frombits ( 0x47002800 ) , out : 0x7801 } , // in f32=32808.000000, out f16=32800
{ in : math . Float32frombits ( 0x47002900 ) , out : 0x7801 } , // in f32=32809.000000, out f16=32800
{ in : math . Float32frombits ( 0x47002a00 ) , out : 0x7801 } , // in f32=32810.000000, out f16=32800
{ in : math . Float32frombits ( 0x47002b00 ) , out : 0x7801 } , // in f32=32811.000000, out f16=32800
{ in : math . Float32frombits ( 0x47002c00 ) , out : 0x7801 } , // in f32=32812.000000, out f16=32800
{ in : math . Float32frombits ( 0x47002d00 ) , out : 0x7801 } , // in f32=32813.000000, out f16=32800
{ in : math . Float32frombits ( 0x47002e00 ) , out : 0x7801 } , // in f32=32814.000000, out f16=32800
{ in : math . Float32frombits ( 0x47002f00 ) , out : 0x7801 } , // in f32=32815.000000, out f16=32800
{ in : math . Float32frombits ( 0x47003000 ) , out : 0x7802 } , // in f32=32816.000000, out f16=32832
{ in : math . Float32frombits ( 0x477fe500 ) , out : 0x7bff } , // in f32=65509.000000, out f16=65504
{ in : math . Float32frombits ( 0x477fe100 ) , out : 0x7bff } , // in f32=65505.000000, out f16=65504
{ in : math . Float32frombits ( 0x477fee00 ) , out : 0x7bff } , // in f32=65518.000000, out f16=65504
{ in : math . Float32frombits ( 0x477fef00 ) , out : 0x7bff } , // in f32=65519.000000, out f16=65504
{ in : math . Float32frombits ( 0x477feffd ) , out : 0x7bff } , // in f32=65519.988281, out f16=65504
{ in : math . Float32frombits ( 0x477ff000 ) , out : 0x7c00 } , // in f32=65520.000000, out f16=+Inf
}
func TestPrecisionFromfloat32 ( t * testing . T ) {
for i , v := range wantF32toF16bits {
f16 := float16 . Fromfloat32 ( v . in )
u16 := uint16 ( f16 )
if u16 != v . out {
t . Errorf ( "i=%d, in f32bits=0x%08x, wanted=0x%04x, got=0x%04x." , i , math . Float32bits ( v . in ) , v . out , u16 )
}
checkPrecision ( t , v . in , f16 , uint64 ( i ) )
}
f32 := float32 ( 5.5 ) // value that doesn't drop any bits in the significand, is within normal exponent range
pre := float16 . PrecisionFromfloat32 ( f32 )
if pre != float16 . PrecisionExact {
t . Errorf ( "f32bits=0x%08x, wanted=PrecisionExact (%d), got=%d." , math . Float32bits ( f32 ) , float16 . PrecisionExact , pre )
}
f32 = math . Float32frombits ( 0x38000000 ) // subnormal value with coef = 0 that can round-trip float32->float16->float32
pre = float16 . PrecisionFromfloat32 ( f32 )
if pre != float16 . PrecisionUnknown {
t . Errorf ( "f32bits=0x%08x, wanted=PrecisionUnknown (%d), got=%d." , math . Float32bits ( f32 ) , float16 . PrecisionUnknown , pre )
}
f32 = math . Float32frombits ( 0x387fc000 ) // subnormal value with coef !=0 that can round-trip float32->float16->float32
pre = float16 . PrecisionFromfloat32 ( f32 )
if pre != float16 . PrecisionUnknown {
t . Errorf ( "f32bits=0x%08x, wanted=PrecisionUnknown (%d), got=%d." , math . Float32bits ( f32 ) , float16 . PrecisionUnknown , pre )
}
f32 = math . Float32frombits ( 0x33c00000 ) // subnormal value with no dropped bits that cannot round-trip float32->float16->float32
pre = float16 . PrecisionFromfloat32 ( f32 )
if pre != float16 . PrecisionUnknown {
t . Errorf ( "f32bits=0x%08x, wanted=PrecisionUnknown (%d), got=%d." , math . Float32bits ( f32 ) , float16 . PrecisionUnknown , pre )
}
f32 = math . Float32frombits ( 0x38000001 ) // subnormal value with dropped non-zero bits > 0
pre = float16 . PrecisionFromfloat32 ( f32 )
if pre != float16 . PrecisionInexact {
t . Errorf ( "f32bits=0x%08x, wanted=PrecisionInexact (%d), got=%d." , math . Float32bits ( f32 ) , float16 . PrecisionInexact , pre )
}
f32 = float32 ( math . Pi ) // value that cannot "preserve value" because it drops bits in the significand
pre = float16 . PrecisionFromfloat32 ( f32 )
if pre != float16 . PrecisionInexact {
t . Errorf ( "f32bits=0x%08x, wanted=PrecisionInexact (%d), got=%d." , math . Float32bits ( f32 ) , float16 . PrecisionInexact , pre )
}
f32 = math . Float32frombits ( 0x1 ) // value that will underflow
pre = float16 . PrecisionFromfloat32 ( f32 )
if pre != float16 . PrecisionUnderflow {
t . Errorf ( "f32bits=0x%08x, wanted=PrecisionUnderflow (%d), got=%d." , math . Float32bits ( f32 ) , float16 . PrecisionUnderflow , pre )
}
f32 = math . Float32frombits ( 0x33000000 ) // value that will underflow
pre = float16 . PrecisionFromfloat32 ( f32 )
if pre != float16 . PrecisionUnderflow {
t . Errorf ( "f32bits=0x%08x, wanted=PrecisionUnderflow (%d), got=%d." , math . Float32bits ( f32 ) , float16 . PrecisionUnderflow , pre )
}
f32 = math . Float32frombits ( 0x47800000 ) // value that will overflow
pre = float16 . PrecisionFromfloat32 ( f32 )
if pre != float16 . PrecisionOverflow {
t . Errorf ( "f32bits=0x%08x, wanted=PrecisionOverflow (%d), got=%d." , math . Float32bits ( f32 ) , float16 . PrecisionOverflow , pre )
}
}
func TestFromNaN32ps ( t * testing . T ) {
for i , v := range wantF32toF16bits {
f16 := float16 . Fromfloat32 ( v . in )
u16 := uint16 ( f16 )
if u16 != v . out {
t . Errorf ( "i=%d, in f32bits=0x%08x, wanted=0x%04x, got=0x%04x." , i , math . Float32bits ( v . in ) , v . out , u16 )
}
checkFromNaN32ps ( t , v . in , f16 )
}
// since checkFromNaN32ps rejects non-NaN input, try one here
nan , err := float16 . FromNaN32ps ( float32 ( math . Pi ) )
if err != float16 . ErrInvalidNaNValue {
t . Errorf ( "FromNaN32ps: in float32(math.Pi) wanted err float16.ErrInvalidNaNValue, got err = %q" , err )
}
if err . Error ( ) != "float16: invalid NaN value, expected IEEE 754 NaN" {
t . Errorf ( "unexpected string value returned by err.Error() for ErrInvalidNaNValue: %s" , err . Error ( ) )
}
if uint16 ( nan ) != 0x7c01 { // signaling NaN
t . Errorf ( "FromNaN32ps: in float32(math.Pi) wanted nan = 0x7c01, got nan = 0x%04x" , uint16 ( nan ) )
}
}
// Test a small subset of possible conversions from float32 to Float16.
// TestSomeFromFloat32 runs in under 1 second while TestAllFromFloat32 takes about 45 seconds.
func TestSomeFromFloat32 ( t * testing . T ) {
for i , v := range wantF32toF16bits {
f16 := float16 . Fromfloat32 ( v . in )
u16 := uint16 ( f16 )
if u16 != v . out {
t . Errorf ( "i=%d, in f32bits=0x%08x, wanted=0x%04x, got=0x%04x." , i , math . Float32bits ( v . in ) , v . out , u16 )
}
}
}
// Test all possible 4294967296 float32 input values and results for
// Fromfloat32(), FromNaN32ps(), and PrecisionFromfloat32().
func TestAllFromFloat32 ( t * testing . T ) {
if testing . Short ( ) {
t . Skip ( "skipping TestAllFromFloat32 in short mode." )
}
fmt . Printf ( "WARNING: TestAllFromFloat32 should take about 1-2 minutes to run on amd64, other platforms may take longer...\n" )
// Blake2b is "3f310bc5608a087462d361644fe66feeb4c68145f6f18eb6f1439cd7914888b6df9e30ae5350dce0635162cc6a2f23b31b3e4353ca132a3c552bdbd58baa54e6"
const wantSHA512 = "08670429a475164d6c4a080969e35231c77ef7069b430b5f38af22e013796b7818bbe8f5942a6ddf26de0e1dfc67d02243f483d85729ebc3762fc2948a5ca1f8"
const batchSize uint32 = 16384
results := make ( [ ] uint16 , batchSize )
buf := new ( bytes . Buffer )
h := sha512 . New ( )
for i := uint64 ( 0 ) ; i < uint64 ( 0xFFFFFFFF ) ; i += uint64 ( batchSize ) {
// fill results
for j := uint32 ( 0 ) ; j < batchSize ; j ++ {
inF32 := math . Float32frombits ( uint32 ( i ) + j )
f16 := float16 . Fromfloat32 ( inF32 )
results [ j ] = uint16 ( f16 )
checkPrecision ( t , inF32 , f16 , i )
checkFromNaN32ps ( t , inF32 , f16 )
}
// convert results to []byte
err := binary . Write ( buf , binary . LittleEndian , results )
if err != nil {
panic ( err )
}
// update hash with []byte of results
_ , err = h . Write ( buf . Bytes ( ) )
if err != nil {
panic ( err )
}
buf . Reset ( )
}
// display hash digest in hex
digest := h . Sum ( nil )
gotSHA512hex := hex . EncodeToString ( digest )
if gotSHA512hex != wantSHA512 {
t . Errorf ( "gotSHA512hex = %s" , gotSHA512hex )
}
}
// Test all 65536 conversions from float16 to float32.
// TestAllToFloat32 runs in under 1 second.
func TestAllToFloat32 ( t * testing . T ) {
// Blake2b is "078d8e3fac9480de1493f22c8f9bfc1eb2051537c536f00f621557d70eed1af057a487c3e252f6d593769f5288d5ab66d8e9cd1adba359838802944bdb731f4d"
const wantSHA512 = "1a4ccec9fd7b6e83310c6b4958a25778cd95f8d4f88b19950e4b8d6932a955f7fbd96b1c9bd9b2a79c3a9d34d653f55e671f8f86e6a5a876660cd38479001aa6"
const batchSize uint32 = 16384
results := make ( [ ] float32 , batchSize )
buf := new ( bytes . Buffer )
h := sha512 . New ( )
for i := uint64 ( 0 ) ; i < uint64 ( 0xFFFF ) ; i += uint64 ( batchSize ) {
// fill results
for j := uint32 ( 0 ) ; j < batchSize ; j ++ {
inU16 := uint16 ( i ) + uint16 ( j )
f16 := float16 . Float16 ( inU16 )
results [ j ] = f16 . Float32 ( )
}
// convert results to []byte
err := binary . Write ( buf , binary . LittleEndian , results )
if err != nil {
panic ( err )
}
// update hash with []byte of results
_ , err = h . Write ( buf . Bytes ( ) )
if err != nil {
panic ( err )
}
buf . Reset ( )
}
// display hash digest in hex
digest := h . Sum ( nil )
gotSHA512hex := hex . EncodeToString ( digest )
if gotSHA512hex != wantSHA512 {
t . Errorf ( "Float16toFloat32: gotSHA512hex = %s" , gotSHA512hex )
}
}
func TestFrombits ( t * testing . T ) {
x := uint16 ( 0x1234 )
f16 := float16 . Frombits ( x )
if uint16 ( f16 ) != f16 . Bits ( ) || uint16 ( f16 ) != x {
t . Errorf ( "float16.Frombits(0x7fff) returned %04x, wanted %04x" , uint16 ( f16 ) , x )
}
}
func TestNaN ( t * testing . T ) {
nan := float16 . NaN ( )
if ! nan . IsNaN ( ) {
t . Errorf ( "nan.IsNaN() returned false, wanted true" )
}
}
func TestInf ( t * testing . T ) {
posInf := float16 . Inf ( 0 )
if uint16 ( posInf ) != 0x7c00 {
t . Errorf ( "float16.Inf(0) returned %04x, wanted %04x" , uint16 ( posInf ) , 0x7c00 )
}
posInf = float16 . Inf ( 1 )
if uint16 ( posInf ) != 0x7c00 {
t . Errorf ( "float16.Inf(1) returned %04x, wanted %04x" , uint16 ( posInf ) , 0x7c00 )
}
negInf := float16 . Inf ( - 1 )
if uint16 ( negInf ) != 0xfc00 {
t . Errorf ( "float16.Inf(-1) returned %04x, wanted %04x" , uint16 ( negInf ) , 0xfc00 )
}
}
func TestBits ( t * testing . T ) {
x := uint16 ( 0x1234 )
f16 := float16 . Frombits ( x )
if uint16 ( f16 ) != f16 . Bits ( ) || f16 . Bits ( ) != x {
t . Errorf ( "Bits() returned %04x, wanted %04x" , uint16 ( f16 ) , x )
}
}
func TestIsFinite ( t * testing . T ) {
// IsFinite returns true if f is neither infinite nor NaN.
finite := float16 . Fromfloat32 ( float32 ( 1.5 ) )
if ! finite . IsFinite ( ) {
t . Errorf ( "finite.Infinite() returned false, wanted true" )
}
posInf := float16 . Inf ( 0 )
if posInf . IsFinite ( ) {
t . Errorf ( "posInf.Infinite() returned true, wanted false" )
}
negInf := float16 . Inf ( - 1 )
if negInf . IsFinite ( ) {
t . Errorf ( "negInf.Infinite() returned true, wanted false" )
}
nan := float16 . NaN ( )
if nan . IsFinite ( ) {
t . Errorf ( "nan.Infinite() returned true, wanted false" )
}
}
func TestIsNaN ( t * testing . T ) {
f16 := float16 . Float16 ( 0 )
if f16 . IsNaN ( ) {
t . Errorf ( "Float16(0).IsNaN() returned true, wanted false" )
}
f16 = float16 . Float16 ( 0x7e00 )
if ! f16 . IsNaN ( ) {
t . Errorf ( "Float16(0x7e00).IsNaN() returned false, wanted true" )
}
}
func TestIsQuietNaN ( t * testing . T ) {
f16 := float16 . Float16 ( 0 )
if f16 . IsQuietNaN ( ) {
t . Errorf ( "Float16(0).IsQuietNaN() returned true, wanted false" )
}
f16 = float16 . Float16 ( 0x7e00 )
if ! f16 . IsQuietNaN ( ) {
t . Errorf ( "Float16(0x7e00).IsQuietNaN() returned false, wanted true" )
}
f16 = float16 . Float16 ( 0x7e00 ^ 0x0200 )
if f16 . IsQuietNaN ( ) {
t . Errorf ( "Float16(0x7e00 ^ 0x0200).IsQuietNaN() returned true, wanted false" )
}
}
func TestIsNormal ( t * testing . T ) {
// IsNormal returns true if f is neither zero, infinite, subnormal, or NaN.
zero := float16 . Frombits ( 0 )
if zero . IsNormal ( ) {
t . Errorf ( "zero.IsNormal() returned true, wanted false" )
}
posInf := float16 . Inf ( 0 )
if posInf . IsNormal ( ) {
t . Errorf ( "posInf.IsNormal() returned true, wanted false" )
}
negInf := float16 . Inf ( - 1 )
if negInf . IsNormal ( ) {
t . Errorf ( "negInf.IsNormal() returned true, wanted false" )
}
nan := float16 . NaN ( )
if nan . IsNormal ( ) {
t . Errorf ( "nan.IsNormal() returned true, wanted false" )
}
subnormal := float16 . Frombits ( 0x0001 )
if subnormal . IsNormal ( ) {
t . Errorf ( "subnormal.IsNormal() returned true, wanted false" )
}
normal := float16 . Fromfloat32 ( float32 ( 1.5 ) )
if ! normal . IsNormal ( ) {
t . Errorf ( "normal.IsNormal() returned false, wanted true" )
}
}
func TestSignbit ( t * testing . T ) {
f16 := float16 . Fromfloat32 ( float32 ( 0.0 ) )
if f16 . Signbit ( ) {
t . Errorf ( "float16.Fromfloat32(float32(0)).Signbit() returned true, wanted false" )
}
f16 = float16 . Fromfloat32 ( float32 ( 2.0 ) )
if f16 . Signbit ( ) {
t . Errorf ( "float16.Fromfloat32(float32(2)).Signbit() returned true, wanted false" )
}
f16 = float16 . Fromfloat32 ( float32 ( - 2.0 ) )
if ! f16 . Signbit ( ) {
t . Errorf ( "float16.Fromfloat32(float32(-2)).Signbit() returned false, wanted true" )
}
}
func TestString ( t * testing . T ) {
f16 := float16 . Fromfloat32 ( 1.5 )
s := f16 . String ( )
if s != "1.5" {
t . Errorf ( "Float16(1.5).String() returned %s, wanted 1.5" , s )
}
f16 = float16 . Fromfloat32 ( 3.141593 )
s = f16 . String ( )
if s != "3.140625" {
t . Errorf ( "Float16(3.141593).String() returned %s, wanted 3.140625" , s )
}
}
func TestIsInf ( t * testing . T ) {
f16 := float16 . Float16 ( 0 )
if f16 . IsInf ( 0 ) {
t . Errorf ( "Float16(0).IsInf(0) returned true, wanted false" )
}
f16 = float16 . Float16 ( 0x7c00 )
if ! f16 . IsInf ( 0 ) {
t . Errorf ( "Float16(0x7c00).IsInf(0) returned false, wanted true" )
}
f16 = float16 . Float16 ( 0x7c00 )
if ! f16 . IsInf ( 1 ) {
t . Errorf ( "Float16(0x7c00).IsInf(1) returned false, wanted true" )
}
f16 = float16 . Float16 ( 0x7c00 )
if f16 . IsInf ( - 1 ) {
t . Errorf ( "Float16(0x7c00).IsInf(-1) returned true, wanted false" )
}
f16 = float16 . Float16 ( 0xfc00 )
if ! f16 . IsInf ( 0 ) {
t . Errorf ( "Float16(0xfc00).IsInf(0) returned false, wanted true" )
}
f16 = float16 . Float16 ( 0xfc00 )
if f16 . IsInf ( 1 ) {
t . Errorf ( "Float16(0xfc00).IsInf(1) returned true, wanted false" )
}
f16 = float16 . Float16 ( 0xfc00 )
if ! f16 . IsInf ( - 1 ) {
t . Errorf ( "Float16(0xfc00).IsInf(-1) returned false, wanted true" )
}
}
func float32parts ( f32 float32 ) ( exp int32 , coef uint32 , dropped uint32 ) {
const COEFMASK uint32 = 0x7fffff // 23 least significant bits
const EXPSHIFT uint32 = 23
const EXPBIAS uint32 = 127
const EXPMASK uint32 = uint32 ( 0xff ) << EXPSHIFT
const DROPMASK uint32 = COEFMASK >> 10
u32 := math . Float32bits ( f32 )
exp = int32 ( ( ( u32 & EXPMASK ) >> EXPSHIFT ) - EXPBIAS )
coef = u32 & COEFMASK
dropped = coef & DROPMASK
return exp , coef , dropped
}
func isNaN32 ( f32 float32 ) bool {
exp , coef , _ := float32parts ( f32 )
return ( exp == 128 ) && ( coef != 0 )
}
func isQuietNaN32 ( f32 float32 ) bool {
exp , coef , _ := float32parts ( f32 )
return ( exp == 128 ) && ( coef != 0 ) && ( ( coef & 0x00400000 ) != 0 )
}
func checkFromNaN32ps ( t * testing . T , f32 float32 , f16 float16 . Float16 ) {
if ! isNaN32 ( f32 ) {
return
}
u32 := math . Float32bits ( f32 )
nan16 , err := float16 . FromNaN32ps ( f32 )
if isQuietNaN32 ( f32 ) {
// result should be the same
if err != nil {
t . Errorf ( "FromNaN32ps: qnan = 0x%08x (%f) wanted err = nil, got err = %q" , u32 , f32 , err )
}
if uint16 ( nan16 ) != uint16 ( f16 ) {
t . Errorf ( "FromNaN32ps: qnan = 0x%08x (%f) wanted nan16 = %v, got nan16 = %v" , u32 , f32 , f16 , nan16 )
}
} else {
// result should differ only by the signaling/quiet bit unless payload is empty
if err != nil {
t . Errorf ( "FromNaN32ps: snan = 0x%08x (%f) wanted err = nil, got err = %q" , u32 , f32 , err )
}
coef := uint16 ( f16 ) & uint16 ( 0x03ff )
payload := uint16 ( f16 ) & uint16 ( 0x01ff )
diff := uint16 ( nan16 ^ f16 )
if payload == 0 {
// the lowest bit needed to be set to prevent turning sNaN into infinity, so 2 bits differ
if diff != 0x0201 {
t . Errorf ( "FromNaN32ps: snan = 0x%08x (%f) wanted diff == 0x0201, got 0x%04x" , u32 , f32 , diff )
}
} else {
// only the quiet bit was restored, so 1 bit differs
if diff != 0x0200 {
t . Errorf ( "FromNaN32ps: snan = 0x%08x (%f) wanted diff == 0x0200, got 0x%04x. f16=0x%04x n16=0x%04x coef=0x%04x" , u32 , f32 , diff , uint16 ( f16 ) , uint16 ( nan16 ) , coef )
}
}
}
}
func checkPrecision ( t * testing . T , f32 float32 , f16 float16 . Float16 , i uint64 ) {
// TODO: rewrite this test when time allows
u32 := math . Float32bits ( f32 )
u16 := f16 . Bits ( )
f32bis := f16 . Float32 ( )
u32bis := math . Float32bits ( f32bis )
pre := float16 . PrecisionFromfloat32 ( f32 )
roundtripped := u32 == u32bis
exp32 , coef32 , dropped32 := float32parts ( f32 )
if roundtripped {
checkRoundTrippedPrecision ( t , u32 , u16 , u32bis , exp32 , coef32 , dropped32 )
return
}
if pre == float16 . PrecisionExact {
// this should only happen if both input and output are NaN
if ! ( f16 . IsNaN ( ) && isNaN32 ( f32 ) ) {
t . Errorf ( "i=%d, PrecisionFromfloat32 in f32bits=0x%08x (%f), out f16bits=0x%04x, back=0x%08x (%f), got PrecisionExact when roundtrip failed with non-special value" , i , u32 , f32 , u16 , u32bis , f32bis )
}
} else if pre == float16 . PrecisionUnknown {
if exp32 < - 24 {
t . Errorf ( "i=%d, PrecisionFromfloat32 in f32bits=0x%08x (%f), out f16bits=0x%04x, back=0x%08x (%f), got PrecisionUnknown, wanted PrecisionUnderflow" , i , u32 , f32 , u16 , u32bis , f32bis )
}
if dropped32 != 0 {
t . Errorf ( "i=%d, PrecisionFromfloat32 in f32bits=0x%08x (%f), out f16bits=0x%04x, back=0x%08x (%f), got PrecisionUnknown, wanted PrecisionInexact" , i , u32 , f32 , u16 , u32bis , f32bis )
}
} else if pre == float16 . PrecisionInexact {
checkPrecisionInexact ( t , u32 , u16 , u32bis , exp32 , coef32 , dropped32 )
} else if pre == float16 . PrecisionUnderflow {
if exp32 >= - 14 {
t . Errorf ( "i=%d, PrecisionFromfloat32 in f32bits=0x%08x (%f), out f16bits=0x%04x, back=0x%08x (%f), got PrecisionUnderflow when exp32 is >= -14" , i , u32 , f32 , u16 , u32bis , f32bis )
}
} else if pre == float16 . PrecisionOverflow {
if exp32 <= 15 {
t . Errorf ( "i=%d, PrecisionFromfloat32 in f32bits=0x%08x (%f), out f16bits=0x%04x, back=0x%08x (%f), got PrecisionOverflow when exp32 is <= 15" , i , u32 , f32 , u16 , u32bis , f32bis )
}
}
}
func checkPrecisionInexact ( t * testing . T , u32 uint32 , u16 uint16 , u32bis uint32 , exp32 int32 , coef32 uint32 , dropped32 uint32 ) {
f32 := math . Float32frombits ( u32 )
f32bis := math . Float32frombits ( u32bis )
if exp32 < - 24 {
t . Errorf ( "PrecisionFromfloat32 in f32bits=0x%08x (%f), out f16bits=0x%04x, back=0x%08x (%f), got PrecisionInexact, wanted PrecisionUnderflow" , u32 , f32 , u16 , u32bis , f32bis )
}
if exp32 > 15 {
t . Errorf ( "PrecisionFromfloat32 in f32bits=0x%08x (%f), out f16bits=0x%04x, back=0x%08x (%f), got PrecisionInexact, wanted PrecisionOverflow" , u32 , f32 , u16 , u32bis , f32bis )
}
if coef32 == 0 {
t . Errorf ( "PrecisionFromfloat32 in f32bits=0x%08x (%f), out f16bits=0x%04x, back=0x%08x (%f), got PrecisionInexact when coef32 is 0" , u32 , f32 , u16 , u32bis , f32bis )
}
if dropped32 == 0 {
t . Errorf ( "PrecisionFromfloat32 in f32bits=0x%08x (%f), out f16bits=0x%04x, back=0x%08x (%f), got PrecisionInexact when dropped32 is 0" , u32 , f32 , u16 , u32bis , f32bis )
}
}
func checkRoundTrippedPrecision ( t * testing . T , u32 uint32 , u16 uint16 , u32bis uint32 , exp32 int32 , coef32 uint32 , dropped32 uint32 ) {
f32 := math . Float32frombits ( u32 )
f32bis := math . Float32frombits ( u32bis )
pre := float16 . PrecisionFromfloat32 ( f32 )
f16 := float16 . Frombits ( u16 )
if dropped32 != 0 {
t . Errorf ( "PrecisionFromfloat32 in f32bits=0x%08x (%f), out f16bits=0x%04x, back=0x%08x (%f), dropped32 != 0 with successful roundtrip" , u32 , f32 , u16 , u32bis , f32bis )
}
if pre != float16 . PrecisionExact {
// there are 2046 values that are subnormal and can round-trip float32->float16->float32
if pre != float16 . PrecisionUnknown {
t . Errorf ( "PrecisionFromfloat32 in f32bits=0x%08x (%032b) (%f), out f16bits=0x%04x (%v), back=0x%08x (%f), got %v, wanted PrecisionExact, exp=%d, coef=%d, drpd=%d" , u32 , u32 , f32 , u16 , f16 , u32bis , f32bis , pre , exp32 , coef32 , dropped32 )
}
}
}