diff --git a/CMakeLists.txt b/CMakeLists.txt index 0c8f19a..ed52791 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,17 +23,20 @@ else() FetchContent_MakeAvailable(googletest) include_directories(.) - add_library(LinearAlgebra STATIC - "FloatSingle.cpp" - "Angle.cpp" - "Vector2.cpp" - "Vector3.cpp" - "Quaternion.cpp" - "Polar.cpp" - "Spherical.cpp" - "Matrix.cpp" - "SwingTwist.cpp" - "Direction.cpp" + file(GLOB srcs + *.cpp + ) + add_library(LinearAlgebra STATIC ${srcs} + # "FloatSingle.cpp" + # "Angle.cpp" + # "Vector2.cpp" + # "Vector3.cpp" + # "Quaternion.cpp" + # "Polar.cpp" + # "Spherical.cpp" + # "Matrix.cpp" + # "SwingTwist.cpp" + # "Direction.cpp" ) enable_testing() diff --git a/float16.cpp b/float16.cpp new file mode 100644 index 0000000..041b3d3 --- /dev/null +++ b/float16.cpp @@ -0,0 +1,250 @@ +// +// FILE: float16.cpp +// AUTHOR: Rob Tillaart +// VERSION: 0.1.8 +// PURPOSE: library for Float16s for Arduino +// URL: http://en.wikipedia.org/wiki/Half-precision_floating-point_format + +#include "float16.h" +// #include +#include + +// CONSTRUCTOR +float16::float16(float f) { _value = f32tof16(f); } + +// PRINTING +// size_t float16::printTo(Print& p) const +// { +// double d = this->f16tof32(_value); +// return p.print(d, _decimals); +// } + +float float16::toFloat() const { return f16tof32(_value); } + +////////////////////////////////////////////////////////// +// +// EQUALITIES +// +bool float16::operator==(const float16 &f) { return (_value == f._value); } + +bool float16::operator!=(const float16 &f) { return (_value != f._value); } + +bool float16::operator>(const float16 &f) { + if ((_value & 0x8000) && (f._value & 0x8000)) + return _value < f._value; + if (_value & 0x8000) + return false; + if (f._value & 0x8000) + return true; + return _value > f._value; +} + +bool float16::operator>=(const float16 &f) { + if ((_value & 0x8000) && (f._value & 0x8000)) + return _value <= f._value; + if (_value & 0x8000) + return false; + if (f._value & 0x8000) + return true; + return _value >= f._value; +} + +bool float16::operator<(const float16 &f) { + if ((_value & 0x8000) && (f._value & 0x8000)) + return _value > f._value; + if (_value & 0x8000) + return true; + if (f._value & 0x8000) + return false; + return _value < f._value; +} + +bool float16::operator<=(const float16 &f) { + if ((_value & (uint16_t)0x8000) && (f._value & (uint16_t)0x8000)) + return _value >= f._value; + if (_value & 0x8000) + return true; + if (f._value & 0x8000) + return false; + return _value <= f._value; +} + +////////////////////////////////////////////////////////// +// +// NEGATION +// +float16 float16::operator-() { + float16 f16; + f16.setBinary(_value ^ 0x8000); + return f16; +} + +////////////////////////////////////////////////////////// +// +// MATH +// +float16 float16::operator+(const float16 &f) { + return float16(this->toFloat() + f.toFloat()); +} + +float16 float16::operator-(const float16 &f) { + return float16(this->toFloat() - f.toFloat()); +} + +float16 float16::operator*(const float16 &f) { + return float16(this->toFloat() * f.toFloat()); +} + +float16 float16::operator/(const float16 &f) { + return float16(this->toFloat() / f.toFloat()); +} + +float16 &float16::operator+=(const float16 &f) { + *this = this->toFloat() + f.toFloat(); + return *this; +} + +float16 &float16::operator-=(const float16 &f) { + *this = this->toFloat() - f.toFloat(); + return *this; +} + +float16 &float16::operator*=(const float16 &f) { + *this = this->toFloat() * f.toFloat(); + return *this; +} + +float16 &float16::operator/=(const float16 &f) { + *this = this->toFloat() / f.toFloat(); + return *this; +} + +////////////////////////////////////////////////////////// +// +// MATH HELPER FUNCTIONS +// +int float16::sign() { + if (_value & 0x8000) + return -1; + if (_value & 0xFFFF) + return 1; + return 0; +} + +bool float16::isZero() { return ((_value & 0x7FFF) == 0x0000); } + +bool float16::isNaN() { + if ((_value & 0x7C00) != 0x7C00) + return false; + if ((_value & 0x03FF) == 0x0000) + return false; + return true; +} + +bool float16::isInf() { return ((_value == 0x7C00) || (_value == 0xFC00)); } + +////////////////////////////////////////////////////////// +// +// CORE CONVERSION +// +float float16::f16tof32(uint16_t _value) const { + uint16_t sgn, man; + int exp; + float f; + + sgn = (_value & 0x8000) > 0; + exp = (_value & 0x7C00) >> 10; + man = (_value & 0x03FF); + + // ZERO + if ((_value & 0x7FFF) == 0) { + return sgn ? -0.0f : 0.0f; + } + // NAN & INF + if (exp == 0x001F) { + if (man == 0) + return sgn ? -INFINITY : INFINITY; + else + return NAN; + } + + // SUBNORMAL/NORMAL + if (exp == 0) + f = 0; + else + f = 1; + + // PROCESS MANTISSE + for (int i = 9; i >= 0; i--) { + f *= 2; + if (man & (1 << i)) + f = f + 1; + } + f = f * powf(2.0f, (float)(exp - 25)); + if (exp == 0) { + f = f * powf(2.0f, -13); // 5.96046447754e-8; + } + return sgn ? -f : f; +} + +uint16_t float16::f32tof16(float f) const { + // untested code, but will avoid strict aliasing warning + // union { + // float f; + // uint32_t t; + // } u; + // u.f = f; + // uint32_t t = u.t; + uint32_t t = *(uint32_t *)&f; + // man bits = 10; but we keep 11 for rounding + uint16_t man = (t & 0x007FFFFF) >> 12; + int16_t exp = (t & 0x7F800000) >> 23; + bool sgn = (t & 0x80000000); + + // handle 0 + if ((t & 0x7FFFFFFF) == 0) { + return sgn ? 0x8000 : 0x0000; + } + // denormalized float32 does not fit in float16 + if (exp == 0x00) { + return sgn ? 0x8000 : 0x0000; + } + // handle infinity & NAN + if (exp == 0x00FF) { + if (man) + return 0xFE00; // NAN + return sgn ? 0xFC00 : 0x7C00; // -INF : INF + } + + // normal numbers + exp = exp - 127 + 15; + // overflow does not fit => INF + if (exp > 30) { + return sgn ? 0xFC00 : 0x7C00; // -INF : INF + } + // subnormal numbers + if (exp < -38) { + return sgn ? 0x8000 : 0x0000; // -0 or 0 ? just 0 ? + } + if (exp <= 0) // subnormal + { + man >>= (exp + 14); + // rounding + man++; + man >>= 1; + if (sgn) + return 0x8000 | man; + return man; + } + + // normal + // TODO rounding + exp <<= 10; + man++; + man >>= 1; + if (sgn) + return 0x8000 | exp | man; + return exp | man; +} + +// -- END OF FILE -- diff --git a/float16.h b/float16.h new file mode 100644 index 0000000..0a95346 --- /dev/null +++ b/float16.h @@ -0,0 +1,74 @@ +#pragma once +// +// FILE: float16.h +// AUTHOR: Rob Tillaart +// VERSION: 0.1.8 +// PURPOSE: Arduino library to implement float16 data type. +// half-precision floating point format, +// used for efficient storage and transport. +// URL: https://github.com/RobTillaart/float16 + +#include + +#define FLOAT16_LIB_VERSION (F("0.1.8")) + +// typedef uint16_t _fp16; + +class float16 { +public: + // Constructors + float16(void) { _value = 0x0000; }; + float16(float f); + float16(const float16 &f) { _value = f._value; }; + + // Conversion + float toFloat(void) const; + // access the 2 byte representation. + uint16_t getBinary() { return _value; }; + void setBinary(uint16_t u) { _value = u; }; + + // Printable + // size_t printTo(Print &p) const; + void setDecimals(uint8_t d) { _decimals = d; }; + uint8_t getDecimals() { return _decimals; }; + + // equalities + bool operator==(const float16 &f); + bool operator!=(const float16 &f); + + bool operator>(const float16 &f); + bool operator>=(const float16 &f); + bool operator<(const float16 &f); + bool operator<=(const float16 &f); + + // negation + float16 operator-(); + + // basic math + float16 operator+(const float16 &f); + float16 operator-(const float16 &f); + float16 operator*(const float16 &f); + float16 operator/(const float16 &f); + + float16 &operator+=(const float16 &f); + float16 &operator-=(const float16 &f); + float16 &operator*=(const float16 &f); + float16 &operator/=(const float16 &f); + + // math helper functions + int sign(); // 1 = positive 0 = zero -1 = negative. + bool isZero(); + bool isNaN(); + bool isInf(); + + // CORE CONVERSION + // should be private but for testing... + float f16tof32(uint16_t) const; + uint16_t f32tof16(float) const; + +private: + uint8_t _decimals = 4; + uint16_t _value; +}; + +// -- END OF FILE --