using System; namespace Passer.LinearAlgebra { public class float16 { // // FILE: float16.cpp // AUTHOR: Rob Tillaart // VERSION: 0.1.8 // PURPOSE: library for Float16s for Arduino // URL: http://en.wikipedia.org/wiki/Half-precision_floating-point_format ushort _value; public float16() { _value = 0; } public float16(float f) { _value = f32tof16(f); } public float toFloat() { return f16tof32(_value); } public ushort GetBinary() { return _value; } public void SetBinary(ushort value) { _value = value; } ////////////////////////////////////////////////////////// // // EQUALITIES // /* bool float16::operator ==(const float16 &f) { return (_value == f._value); } bool float16::operator !=(const float16 &f) { return (_value != f._value); } bool float16::operator >(const float16 &f) { if ((_value & 0x8000) && (f._value & 0x8000)) return _value < f._value; if (_value & 0x8000) return false; if (f._value & 0x8000) return true; return _value > f._value; } bool float16::operator >=(const float16 &f) { if ((_value & 0x8000) && (f._value & 0x8000)) return _value <= f._value; if (_value & 0x8000) return false; if (f._value & 0x8000) return true; return _value >= f._value; } bool float16::operator <(const float16 &f) { if ((_value & 0x8000) && (f._value & 0x8000)) return _value > f._value; if (_value & 0x8000) return true; if (f._value & 0x8000) return false; return _value < f._value; } bool float16::operator <=(const float16 &f) { if ((_value & 0x8000) && (f._value & 0x8000)) return _value >= f._value; if (_value & 0x8000) return true; if (f._value & 0x8000) return false; return _value <= f._value; } ////////////////////////////////////////////////////////// // // NEGATION // float16 float16::operator -() { float16 f16; f16.setBinary(_value ^ 0x8000); return f16; } ////////////////////////////////////////////////////////// // // MATH // float16 float16::operator +(const float16 &f) { return float16(this->toDouble() + f.toDouble()); } float16 float16::operator -(const float16 &f) { return float16(this->toDouble() - f.toDouble()); } float16 float16::operator *(const float16 &f) { return float16(this->toDouble() * f.toDouble()); } float16 float16::operator /(const float16 &f) { return float16(this->toDouble() / f.toDouble()); } float16 & float16::operator+=(const float16 &f) { *this = this->toDouble() + f.toDouble(); return *this; } float16 & float16::operator-=(const float16 &f) { *this = this->toDouble() - f.toDouble(); return *this; } float16 & float16::operator*=(const float16 &f) { *this = this->toDouble() * f.toDouble(); return *this; } float16 & float16::operator/=(const float16 &f) { *this = this->toDouble() / f.toDouble(); return *this; } ////////////////////////////////////////////////////////// // // MATH HELPER FUNCTIONS // int float16::sign() { if (_value & 0x8000) return -1; if (_value & 0xFFFF) return 1; return 0; } bool float16::isZero() { return ((_value & 0x7FFF) == 0x0000); } bool float16::isNaN() { if ((_value & 0x7C00) != 0x7C00) return false; if ((_value & 0x03FF) == 0x0000) return false; return true; } bool float16::isInf() { return ((_value == 0x7C00) || (_value == 0xFC00)); } */ ////////////////////////////////////////////////////////// // // CORE CONVERSION // float f16tof32(ushort _value) { //ushort sgn; ushort man; int exp; float f; //Debug.Log($"{_value}"); bool sgn = (_value & 0x8000) > 0; exp = (_value & 0x7C00) >> 10; man = (ushort)(_value & 0x03FF); //Debug.Log($"{sgn} {exp} {man}"); // ZERO if ((_value & 0x7FFF) == 0) { return sgn ? -0 : 0; } // NAN & INF if (exp == 0x001F) { if (man == 0) return sgn ? float.NegativeInfinity : float.PositiveInfinity; //-INFINITY : INFINITY; else return float.NaN; // NAN; } // SUBNORMAL/NORMAL if (exp == 0) f = 0; else f = 1; // PROCESS MANTISSE for (int i = 9; i >= 0; i--) { f *= 2; if ((man & (1 << i)) != 0) f = f + 1; } //Debug.Log($"{f}"); f = f * (float)Math.Pow(2.0f, exp - 25); if (exp == 0) { f = f * (float)Math.Pow(2.0f, -13); // 5.96046447754e-8; } //Debug.Log($"{f}"); return sgn ? -f : f; } public static uint SingleToInt32Bits(float value) { byte[] bytes = BitConverter.GetBytes(value); if (BitConverter.IsLittleEndian) Array.Reverse(bytes); // If the system is little-endian, reverse the byte order return BitConverter.ToUInt32(bytes, 0); } ushort f32tof16(float f) { //uint t = *(uint*)&f; //uint t = (uint)BitConverter.SingleToInt32Bits(f); uint t = SingleToInt32Bits(f); // man bits = 10; but we keep 11 for rounding ushort man = (ushort)((t & 0x007FFFFF) >> 12); short exp = (short)((t & 0x7F800000) >> 23); bool sgn = (t & 0x80000000) != 0; // handle 0 if ((t & 0x7FFFFFFF) == 0) { return sgn ? (ushort)0x8000 : (ushort)0x0000; } // denormalized float32 does not fit in float16 if (exp == 0x00) { return sgn ? (ushort)0x8000 : (ushort)0x0000; } // handle infinity & NAN if (exp == 0x00FF) { if (man != 0) return 0xFE00; // NAN return sgn ? (ushort)0xFC00 : (ushort)0x7C00; // -INF : INF } // normal numbers exp = (short)(exp - 127 + 15); // overflow does not fit => INF if (exp > 30) { return sgn ? (ushort)0xFC00 : (ushort)0x7C00; // -INF : INF } // subnormal numbers if (exp < -38) { return sgn ? (ushort)0x8000 : (ushort)0x0000; // -0 or 0 ? just 0 ? } if (exp <= 0) // subnormal { man >>= (exp + 14); // rounding man++; man >>= 1; if (sgn) return (ushort)(0x8000 | man); return man; } // normal // TODO rounding exp <<= 10; man++; man >>= 1; ushort uexp = (ushort)exp; if (sgn) return (ushort)(0x8000 | uexp | man); return (ushort)(uexp | man); } // -- END OF FILE -- } }