2025-02-27 11:19:50 +01:00

322 lines
9.4 KiB
C#

using System;
namespace Passer.LinearAlgebra {
public class float16 {
//
// FILE: float16.cpp
// AUTHOR: Rob Tillaart
// VERSION: 0.1.8
// PURPOSE: library for Float16s for Arduino
// URL: http://en.wikipedia.org/wiki/Half-precision_floating-point_format
ushort _value;
public float16() { _value = 0; }
public float16(float f) {
//_value = f32tof16(f);
_value = F32ToF16__(f);
}
public float toFloat() {
return f16tof32(_value);
}
public ushort GetBinary() { return _value; }
public void SetBinary(ushort value) { _value = value; }
//////////////////////////////////////////////////////////
//
// EQUALITIES
//
/*
bool float16::operator ==(const float16 &f) { return (_value == f._value); }
bool float16::operator !=(const float16 &f) { return (_value != f._value); }
bool float16::operator >(const float16 &f) {
if ((_value & 0x8000) && (f._value & 0x8000))
return _value < f._value;
if (_value & 0x8000)
return false;
if (f._value & 0x8000)
return true;
return _value > f._value;
}
bool float16::operator >=(const float16 &f) {
if ((_value & 0x8000) && (f._value & 0x8000))
return _value <= f._value;
if (_value & 0x8000)
return false;
if (f._value & 0x8000)
return true;
return _value >= f._value;
}
bool float16::operator <(const float16 &f) {
if ((_value & 0x8000) && (f._value & 0x8000))
return _value > f._value;
if (_value & 0x8000)
return true;
if (f._value & 0x8000)
return false;
return _value < f._value;
}
bool float16::operator <=(const float16 &f) {
if ((_value & 0x8000) && (f._value & 0x8000))
return _value >= f._value;
if (_value & 0x8000)
return true;
if (f._value & 0x8000)
return false;
return _value <= f._value;
}
//////////////////////////////////////////////////////////
//
// NEGATION
//
float16 float16::operator -() {
float16 f16;
f16.setBinary(_value ^ 0x8000);
return f16;
}
//////////////////////////////////////////////////////////
//
// MATH
//
float16 float16::operator +(const float16 &f) {
return float16(this->toDouble() + f.toDouble());
}
float16 float16::operator -(const float16 &f) {
return float16(this->toDouble() - f.toDouble());
}
float16 float16::operator *(const float16 &f) {
return float16(this->toDouble() * f.toDouble());
}
float16 float16::operator /(const float16 &f) {
return float16(this->toDouble() / f.toDouble());
}
float16 & float16::operator+=(const float16 &f) {
*this = this->toDouble() + f.toDouble();
return *this;
}
float16 & float16::operator-=(const float16 &f) {
*this = this->toDouble() - f.toDouble();
return *this;
}
float16 & float16::operator*=(const float16 &f) {
*this = this->toDouble() * f.toDouble();
return *this;
}
float16 & float16::operator/=(const float16 &f) {
*this = this->toDouble() / f.toDouble();
return *this;
}
//////////////////////////////////////////////////////////
//
// MATH HELPER FUNCTIONS
//
int float16::sign() {
if (_value & 0x8000)
return -1;
if (_value & 0xFFFF)
return 1;
return 0;
}
bool float16::isZero() { return ((_value & 0x7FFF) == 0x0000); }
bool float16::isNaN() {
if ((_value & 0x7C00) != 0x7C00)
return false;
if ((_value & 0x03FF) == 0x0000)
return false;
return true;
}
bool float16::isInf() { return ((_value == 0x7C00) || (_value == 0xFC00)); }
*/
//////////////////////////////////////////////////////////
//
// CORE CONVERSION
//
float f16tof32(ushort _value) {
//ushort sgn;
ushort man;
int exp;
float f;
//Debug.Log($"{_value}");
bool sgn = (_value & 0x8000) > 0;
exp = (_value & 0x7C00) >> 10;
man = (ushort)(_value & 0x03FF);
//Debug.Log($"{sgn} {exp} {man}");
// ZERO
if ((_value & 0x7FFF) == 0) {
return sgn ? -0 : 0;
}
// NAN & INF
if (exp == 0x001F) {
if (man == 0)
return sgn ? float.NegativeInfinity : float.PositiveInfinity; //-INFINITY : INFINITY;
else
return float.NaN; // NAN;
}
// SUBNORMAL/NORMAL
if (exp == 0)
f = 0;
else
f = 1;
// PROCESS MANTISSE
for (int i = 9; i >= 0; i--) {
f *= 2;
if ((man & (1 << i)) != 0)
f = f + 1;
}
//Debug.Log($"{f}");
f = f * (float)Math.Pow(2.0f, exp - 25);
if (exp == 0) {
f = f * (float)Math.Pow(2.0f, -13); // 5.96046447754e-8;
}
//Debug.Log($"{f}");
return sgn ? -f : f;
}
public static uint SingleToInt32Bits(float value) {
byte[] bytes = BitConverter.GetBytes(value);
if (BitConverter.IsLittleEndian)
Array.Reverse(bytes); // If the system is little-endian, reverse the byte order
return BitConverter.ToUInt32(bytes, 0);
}
public ushort F32ToF16__(float f) {
uint t = BitConverter.ToUInt32(BitConverter.GetBytes(f), 0);
ushort man = (ushort)((t & 0x007FFFFF) >> 12);
int exp = (int)((t & 0x7F800000) >> 23);
bool sgn = (t & 0x80000000) != 0;
// handle 0
if ((t & 0x7FFFFFFF) == 0) {
return sgn ? (ushort)0x8000 : (ushort)0x0000;
}
// denormalized float32 does not fit in float16
if (exp == 0x00) {
return sgn ? (ushort)0x8000 : (ushort)0x0000;
}
// handle infinity & NAN
if (exp == 0x00FF) {
if (man != 0)
return 0xFE00; // NAN
return sgn ? (ushort)0xFC00 : (ushort)0x7C00; // -INF : INF
}
// normal numbers
exp = exp - 127 + 15;
// overflow does not fit => INF
if (exp > 30) {
return sgn ? (ushort)0xFC00 : (ushort)0x7C00; // -INF : INF
}
// subnormal numbers
if (exp < -38) {
return sgn ? (ushort)0x8000 : (ushort)0x0000; // -0 or 0 ? just 0 ?
}
if (exp <= 0) // subnormal
{
man >>= (exp + 14);
// rounding
man++;
man >>= 1;
if (sgn)
return (ushort)(0x8000 | man);
return man;
}
// normal
// TODO rounding
exp <<= 10;
man++;
man >>= 1;
if (sgn)
return (ushort)(0x8000 | exp | man);
return (ushort)(exp | man);
}
//This function is faulty!!!!
ushort f32tof16(float f) {
//uint t = *(uint*)&f;
//uint t = (uint)BitConverter.SingleToInt32Bits(f);
uint t = SingleToInt32Bits(f);
// man bits = 10; but we keep 11 for rounding
ushort man = (ushort)((t & 0x007FFFFF) >> 12);
short exp = (short)((t & 0x7F800000) >> 23);
bool sgn = (t & 0x80000000) != 0;
// handle 0
if ((t & 0x7FFFFFFF) == 0) {
return sgn ? (ushort)0x8000 : (ushort)0x0000;
}
// denormalized float32 does not fit in float16
if (exp == 0x00) {
return sgn ? (ushort)0x8000 : (ushort)0x0000;
}
// handle infinity & NAN
if (exp == 0x00FF) {
if (man != 0)
return 0xFE00; // NAN
return sgn ? (ushort)0xFC00 : (ushort)0x7C00; // -INF : INF
}
// normal numbers
exp = (short)(exp - 127 + 15);
// overflow does not fit => INF
if (exp > 30) {
return sgn ? (ushort)0xFC00 : (ushort)0x7C00; // -INF : INF
}
// subnormal numbers
if (exp < -38) {
return sgn ? (ushort)0x8000 : (ushort)0x0000; // -0 or 0 ? just 0 ?
}
if (exp <= 0) // subnormal
{
man >>= (exp + 14);
// rounding
man++;
man >>= 1;
if (sgn)
return (ushort)(0x8000 | man);
return man;
}
// normal
// TODO rounding
exp <<= 10;
man++;
man >>= 1;
ushort uexp = (ushort)exp;
if (sgn)
return (ushort)(0x8000 | uexp | man);
return (ushort)(uexp | man);
}
// -- END OF FILE --
}
}