@@ -11,45 +11,45 @@ Base.inttype(::Type{Posit16_1}) = Int16
11
11
Base. inttype (:: Type{Posit32} ) = Int32
12
12
13
13
# generic conversion to UInt/Int
14
- Base. unsigned (x:: AbstractPosit ) = reinterpret (Base. uinttype (typeof (x)),x)
15
- Base. signed (x:: AbstractPosit ) = reinterpret (Base. inttype (typeof (x)),x)
14
+ Base. unsigned (x:: AbstractPosit ) = reinterpret (Base. uinttype (typeof (x)), x)
15
+ Base. signed (x:: AbstractPosit ) = reinterpret (Base. inttype (typeof (x)), x)
16
16
17
17
# BOOL
18
18
for PositType in (:Posit8 , :Posit16 , :Posit32 , :Posit16_1 )
19
19
@eval begin
20
20
$ PositType (x:: Bool ) = x ? one ($ PositType) : zero ($ PositType)
21
- Base. promote_rule (:: Type{Bool} ,:: Type{$PositType} ) = $ PositType
21
+ Base. promote_rule (:: Type{Bool} , :: Type{$PositType} ) = $ PositType
22
22
end
23
23
end
24
24
25
25
# easier for development purposes
26
- Posit8 (x:: UInt8 ) = reinterpret (Posit8,x)
27
- Posit16 (x:: UInt16 ) = reinterpret (Posit16,x)
28
- Posit16_1 (x:: UInt16 ) = reinterpret (Posit16_1,x)
29
- Posit32 (x:: UInt32 ) = reinterpret (Posit32,x)
26
+ Posit8 (x:: UInt8 ) = reinterpret (Posit8, x)
27
+ Posit16 (x:: UInt16 ) = reinterpret (Posit16, x)
28
+ Posit16_1 (x:: UInt16 ) = reinterpret (Posit16_1, x)
29
+ Posit32 (x:: UInt32 ) = reinterpret (Posit32, x)
30
30
31
31
# BETWEEN Posits
32
32
# upcasting: append with zeros.
33
- Posit16 (x:: Posit8 ) = reinterpret (Posit16,(unsigned (x) % UInt16) << 8 )
34
- Posit32 (x:: Posit8 ) = reinterpret (Posit32,(unsigned (x) % UInt32) << 24 )
35
- Posit32 (x:: Posit16 ) = reinterpret (Posit32,(unsigned (x) % UInt32) << 16 )
33
+ Posit16 (x:: Posit8 ) = reinterpret (Posit16, (unsigned (x) % UInt16) << 8 )
34
+ Posit32 (x:: Posit8 ) = reinterpret (Posit32, (unsigned (x) % UInt32) << 24 )
35
+ Posit32 (x:: Posit16 ) = reinterpret (Posit32, (unsigned (x) % UInt32) << 16 )
36
36
37
37
# downcasting: apply round to nearest
38
- Posit8 (x:: Posit16 ) = posit (Posit8,x)
39
- Posit8 (x:: Posit32 ) = posit (Posit8,x)
40
- Posit16 (x:: Posit32 ) = posit (Posit16,x)
38
+ Posit8 (x:: Posit16 ) = posit (Posit8, x)
39
+ Posit8 (x:: Posit32 ) = posit (Posit8, x)
40
+ Posit16 (x:: Posit32 ) = posit (Posit16, x)
41
41
42
42
# conversion to and from Posit16_1 via floats as number of exponent bits changes
43
43
Posit16_1 (x:: AbstractPosit ) = Posit16_1 (float (x))
44
44
Posit8 (x:: Posit16_1 ) = Posit8 (float (x))
45
45
Posit16 (x:: Posit16_1 ) = Posit16 (float (x))
46
46
Posit32 (x:: Posit16_1 ) = Posit32 (float (x))
47
47
48
- function posit (:: Type{PositN1} ,x:: PositN2 ) where {PositN1<: AbstractPosit ,PositN2<: AbstractPosit }
49
- return reinterpret (PositN1,bitround (Base. uinttype (PositN1),unsigned (x)))
48
+ function posit (:: Type{PositN1} , x:: PositN2 ) where {PositN1<: AbstractPosit , PositN2<: AbstractPosit }
49
+ return reinterpret (PositN1, bitround (Base. uinttype (PositN1), unsigned (x)))
50
50
end
51
51
52
- function bitround (:: Type{UIntN1} ,ui:: UIntN2 ) where {UIntN1<: Unsigned ,UIntN2<: Unsigned }
52
+ function bitround (:: Type{UIntN1} , ui:: UIntN2 ) where {UIntN1<: Unsigned , UIntN2<: Unsigned }
53
53
Δbits = bitsize (UIntN2) - bitsize (UIntN1) # difference in bits
54
54
55
55
# ROUND TO NEAREST, tie to even: create ulp/2 = ..007ff.. or ..0080..
@@ -72,36 +72,36 @@ Posit32(x::Signed) = Posit32(Float64(x))
72
72
Base. Int (x:: AbstractPosit ) = Int (Float64 (x))
73
73
74
74
# promotions
75
- Base. promote_rule (:: Type{Int} ,:: Type{T} ) where {T<: AbstractPosit } = T
75
+ Base. promote_rule (:: Type{Int} , :: Type{T} ) where {T<: AbstractPosit } = T
76
76
77
77
# FROM FLOATS
78
- Posit8 (x:: T ) where {T<: Base.IEEEFloat } = posit (Posit8,x)
79
- Posit16 (x:: T ) where {T<: Base.IEEEFloat } = posit (Posit16,x)
80
- Posit16_1 (x:: T ) where {T<: Base.IEEEFloat } = posit (Posit16_1,x)
81
- Posit32 (x:: T ) where {T<: Base.IEEEFloat } = posit (Posit32,x)
78
+ Posit8 (x:: T ) where {T<: Base.IEEEFloat } = posit (Posit8, x)
79
+ Posit16 (x:: T ) where {T<: Base.IEEEFloat } = posit (Posit16, x)
80
+ Posit16_1 (x:: T ) where {T<: Base.IEEEFloat } = posit (Posit16_1, x)
81
+ Posit32 (x:: T ) where {T<: Base.IEEEFloat } = posit (Posit32, x)
82
82
83
- function posit (:: Type{PositN} ,x:: FloatN ) where {PositN<: AbstractPosit ,FloatN<: Base.IEEEFloat }
83
+ function posit (:: Type{PositN} , x:: FloatN ) where {PositN<: AbstractPosit , FloatN<: Base.IEEEFloat }
84
84
85
85
UIntN = Base. uinttype (FloatN) # unsigned integer corresponding to FloatN
86
86
IntN = Base. inttype (FloatN) # signed integer corresponding to FloatN
87
- ui = reinterpret (UIntN,x) # reinterpret input
87
+ ui = reinterpret (UIntN, x) # reinterpret input
88
88
89
89
# extract exponent bits and shift to tail, then remove bias
90
90
e = (ui & Base. exponent_mask (FloatN)) >> Base. significand_bits (FloatN)
91
- e = reinterpret (IntN,e) - IntN (Base. exponent_bias (FloatN))
91
+ e = reinterpret (IntN, e) - IntN (Base. exponent_bias (FloatN))
92
92
signbit_e = signbit (e) # sign of exponent
93
93
k = e >> Base. exponent_bits (PositN) # k-value for useed^k in posits
94
94
95
95
# ASSEMBLE POSIT REGIME, EXPONENT, MANTISSA
96
- # get posit exponent_bits and shift to starting from bitposition 3 (they'll be shifted in later)
97
- exponent_bits = e & Base. exponent_mask (PositN)
96
+ # get posit exponent_bits and shift to starting from bitposition 3 (they'll be shifted in later)
97
+ exponent_bits = signed ( e & Base. exponent_mask (PositN) )
98
98
exponent_bits <<= bitsize (FloatN)- 2 - Base. exponent_bits (PositN)
99
99
100
100
# create 01000... (for |x|<1) or 10000... (|x| > 1)
101
- regime_bits = reinterpret (IntN,Base. sign_mask (FloatN) >> signbit_e)
101
+ regime_bits = reinterpret (IntN, Base. sign_mask (FloatN) >> signbit_e)
102
102
103
103
# extract mantissa bits and push to behind exponent rre..emm... (regime still hasn't been shifted)
104
- mantissa = reinterpret (IntN,ui & Base. significand_mask (FloatN))
104
+ mantissa = reinterpret (IntN, ui & Base. significand_mask (FloatN))
105
105
mantissa <<= Base. exponent_bits (FloatN) - Base. exponent_bits (PositN) - 1
106
106
107
107
# combine regime, exponent, mantissa and arithmetic bitshift for 11..110em or 00..001em
@@ -110,14 +110,15 @@ function posit(::Type{PositN},x::FloatN) where {PositN<:AbstractPosit,FloatN<:Ba
110
110
regime_exponent_mantissa &= ~ Base. sign_mask (FloatN) # remove possible sign bit from arith shift
111
111
112
112
# round to nearest of the result
113
- p_rounded = bitround (Base. uinttype (PositN),reinterpret (UIntN, regime_exponent_mantissa))
113
+ p_rounded = bitround (Base. uinttype (PositN), unsigned ( regime_exponent_mantissa))
114
114
115
115
# no under or overflow rounding mode
116
116
max_k = (Base. exponent_bias (FloatN) >> Base. exponent_bits (PositN)) + 1
117
117
p_rounded -= Base. inttype (PositN)(sign (k)* (bitsize (PositN) <= abs (k) < max_k))
118
118
119
119
p_rounded = signbit (x) ? - p_rounded : p_rounded # two's complement for negative numbers
120
- return reinterpret (PositN,p_rounded)
120
+
121
+ return reinterpret (PositN, p_rounded)
121
122
end
122
123
123
124
# # TO FLOATS
@@ -134,12 +135,12 @@ Base.Float64(x::AbstractPosit) = float(Float64,x)
134
135
135
136
# The dynamic range of Float16 is smaller than Posit8/16/32
136
137
# for correct rounding convert first to Float32/64
137
- Base. Float16 (x:: Posit8 ) = Float16 (float (Float32,x))
138
- Base. Float16 (x:: Posit16 ) = Float16 (float (Float32,x))
139
- Base. Float16 (x:: Posit16_1 ) = Float16 (float (Float32,x))
140
- Base. Float16 (x:: Posit32 ) = Float16 (float (Float64,x))
138
+ Base. Float16 (x:: Posit8 ) = Float16 (float (Float32, x))
139
+ Base. Float16 (x:: Posit16 ) = Float16 (float (Float32, x))
140
+ Base. Float16 (x:: Posit16_1 ) = Float16 (float (Float32, x))
141
+ Base. Float16 (x:: Posit32 ) = Float16 (float (Float64, x))
141
142
142
- function Base. float (:: Type{FloatN} ,x:: PositN ) where {FloatN<: Base.IEEEFloat ,PositN<: AbstractPosit }
143
+ function Base. float (:: Type{FloatN} , x:: PositN ) where {FloatN<: Base.IEEEFloat , PositN<: AbstractPosit }
143
144
144
145
UIntN = Base. uinttype (FloatN) # corresponding UInt for floattype
145
146
n_bits = bitsize (PositN) # number of bits in posit format
@@ -164,20 +165,20 @@ function Base.float(::Type{FloatN},x::PositN) where {FloatN<:Base.IEEEFloat,Posi
164
165
165
166
# ASSEMBLE FLOAT EXPONENT
166
167
# useed^k * 2^e = 2^(2^n_exponent_bits*k+e), ie get k-value from number of regime bits,
167
- # << n_exponent_bits for *2^exponent_bits, add exponent bits and Float exponent bias (=15,127,1023)
168
+ # << n_exponent_bits for *2^exponent_bits, add exponent bits and Float exponent bias (=15,127,1023)
168
169
k = (- 1 + 2 sign_exponent)* n_regimebits - sign_exponent
169
170
exponent = ((k << Base. exponent_bits (PositN)) + exponent_bits + Base. exponent_bias (FloatN)) % UIntN
170
171
exponent <<= Base. significand_bits (FloatN)
171
172
172
173
# set exponent (and 1st mantissa bit) to NaN for NaR inputs
173
174
# set exponent to 0 for zero(Posit8) input
174
- nan_ui = reinterpret (UIntN,nan (FloatN))
175
+ nan_ui = reinterpret (UIntN, nan (FloatN))
175
176
exponent = n_regimebits == n_bits ? (signbitx ? nan_ui : zero (exponent)) : exponent
176
177
177
178
# assemble sign, exponent and mantissa bits
178
179
sign = signbitx* Base. sign_mask (FloatN)
179
180
f = sign | exponent | mantissa # concatenate sign, exponent and mantissa
180
- return reinterpret (FloatN,f)
181
+ return reinterpret (FloatN, f)
181
182
end
182
183
183
184
# BIGFLOAT
0 commit comments