Skip to content

Commit c8632e1

Browse files
committed
Update structure of proj and fix some of fp16
1 parent ccabd4f commit c8632e1

24 files changed

+115
-101
lines changed

mim/include/float16.hpp

Lines changed: 0 additions & 65 deletions
This file was deleted.

mim/include/mim/float16.hpp

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// 16 bit floating point data structure
2+
3+
#pragma once
4+
5+
#include <cstdint>
6+
#include "internal/float16compressor.hpp"
7+
8+
namespace mim
9+
{
10+
struct float16
11+
{
12+
uint16_t value;
13+
14+
float16() = default;
15+
16+
inline float16(const float16& other) : value(other.value) {}
17+
inline explicit float16(const uint16_t other) : value(other) {}
18+
19+
inline float16& operator=(const float16& other)
20+
{
21+
value = other.value;
22+
return *this;
23+
}
24+
25+
inline explicit float16(const float other)
26+
{
27+
value = Float16Compressor::Compress(other);
28+
}
29+
30+
inline explicit operator float() const
31+
{
32+
uint16_t temp;
33+
std::memcpy(&temp, &value, sizeof(temp));
34+
return Float16Compressor::Decompress(temp);
35+
}
36+
37+
inline float ToFloat() const
38+
{
39+
uint16_t temp;
40+
std::memcpy(&temp, &value, sizeof(temp));
41+
return Float16Compressor::Decompress(temp);
42+
}
43+
44+
inline bool operator==(const float16& other) const
45+
{
46+
return value == other.value;
47+
}
48+
49+
inline bool operator!=(const float16& other) const
50+
{
51+
return value != other.value;
52+
}
53+
54+
static const float16 Zero() { return float16(static_cast<uint16_t>(0)); }
55+
};
56+
57+
// Validate that the float16 structure is packed correctly.
58+
static_assert(sizeof(float16) == sizeof(uint16_t), "float16 structure is not packed correctly.");
59+
}

mim/include/internal/float16compressor.hpp renamed to mim/include/mim/internal/float16compressor.hpp

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -18,28 +18,28 @@ namespace mim
1818
uint32_t uint;
1919
};
2020

21-
static int32_t const shift = 13; // mantissa bits: 13, exponent bits: 16
22-
static int32_t const shiftSign = 16; // sign bits: 1, left shift amount: 15
21+
static int32_t constexpr shift = 13; // mantissa bits: 13, exponent bits: 16
22+
static int32_t constexpr shiftSign = 16; // sign bits: 1, left shift amount: 15
2323

24-
static int32_t const inf32 = 0x7F800000; // FP32 infinity bit pattern
25-
static int32_t const max32 = 0x477FE000; // max FP16 normal as a FP32 bit pattern
26-
static int32_t const min32 = 0x38800000; // min FP16 normal as a FP32 bit pattern
27-
static int32_t const sign32 = 0x80000000; // FP32 sign bit pattern
24+
static int32_t constexpr inf32 = 0x7F800000; // FP32 infinity bit pattern
25+
static int32_t constexpr max32 = 0x477FE000; // max FP16 normal as a FP32 bit pattern
26+
static int32_t constexpr min32 = 0x38800000; // min FP16 normal as a FP32 bit pattern
27+
static int32_t constexpr sign32 = 0x80000000; // FP32 sign bit pattern
2828

29-
static int32_t const inf16 = inf32 >> shift; // FP16 infinity bit pattern
30-
static int32_t const nan16 = (inf16 + 1) << shift; // min FP16 NaN as a FP32 bit pattern
31-
static int32_t const max16 = max32 >> shift; // max FP16 normal bit pattern
32-
static int32_t const min16 = min32 >> shift; // min FP16 normal bit pattern
33-
static int32_t const sign16 = sign32 >> shiftSign; // FP16 sign bit pattern
29+
static int32_t constexpr inf16 = inf32 >> shift; // FP16 infinity bit pattern
30+
static int32_t constexpr nan16 = (inf16 + 1) << shift; // min FP16 NaN as a FP32 bit pattern
31+
static int32_t constexpr max16 = max32 >> shift; // max FP16 normal bit pattern
32+
static int32_t constexpr min16 = min32 >> shift; // min FP16 normal bit pattern
33+
static int32_t constexpr sign16 = sign32 >> shiftSign; // FP16 sign bit pattern
3434

35-
static int32_t const mul32to16 = 0x52000000; // multiplier to convert FP32 to FP16
36-
static int32_t const mul16to32 = 0x33800000; // multiplier to convert FP16 to FP32
35+
static int32_t constexpr mul32to16 = 0x52000000; // multiplier to convert FP32 to FP16
36+
static int32_t constexpr mul16to32 = 0x33800000; // multiplier to convert FP16 to FP32
3737

38-
static int32_t const sub32 = 0x003FF; // maximum float32 subnormal value, down shifted
39-
static int32_t const nor32 = 0x00400; // minimum float32 normal value, down shifted
38+
static int32_t constexpr sub32 = 0x003FF; // maximum float32 subnormal value, down shifted
39+
static int32_t constexpr nor32 = 0x00400; // minimum float32 normal value, down shifted
4040

41-
static int32_t const maxDiff = inf16 - max16 - 1; // diff. between max FP16 normal and infinity
42-
static int32_t const minDiff = min16 - sub32 - 1; // diff. between min FP16 normal and max float32 subnormal
41+
static int32_t constexpr maxDiff = inf16 - max16 - 1; // diff. between max FP16 normal and infinity
42+
static int32_t constexpr minDiff = min16 - sub32 - 1; // diff. between min FP16 normal and max float32 subnormal
4343

4444

4545
public:
@@ -55,7 +55,7 @@ namespace mim
5555

5656
// Calculate the shifted bits of the floating-point number.
5757
shift_bits.sint = mul32to16;
58-
shift_bits.sint = (int32_t)(shift_bits.fl * value_bits.fl); // correct subnormals
58+
shift_bits.sint = static_cast<int32_t>(shift_bits.fl * value_bits.fl); // correct subnormals
5959

6060
// Adjust the floating-point number's bits based on certain conditions.
6161
value_bits.sint ^= (shift_bits.sint ^ value_bits.sint) & -(min32 > value_bits.sint);
@@ -70,7 +70,7 @@ namespace mim
7070
value_bits.sint ^= ((value_bits.sint - minDiff) ^ value_bits.sint) & -(value_bits.sint > sub32);
7171

7272
// Combine the sign and bits of the compressed number and return it.
73-
return (uint16_t)(value_bits.uint | sign_bits);
73+
return static_cast<uint16_t>(value_bits.uint | sign_bits);
7474
}
7575

7676
static float Decompress(uint16_t compressed_value)
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)