Skip to content

Commit f4ca510

Browse files
committed
cleanup
Signed-off-by: Yuan Yao <[email protected]>
1 parent 0192fe7 commit f4ca510

12 files changed

+100
-96
lines changed

docs/Operators.md

+37-29
Original file line numberDiff line numberDiff line change
@@ -3747,7 +3747,9 @@ for from_type, to_type in test_cases:
37473747
raise ValueError(
37483748
f"Conversion from {from_type} to {to_type} is not tested."
37493749
)
3750-
expected = vect_evaluate_float4e2m1_from_bits(subbyte.float32_to_float4e2m1_unpacked(np_fp32))
3750+
expected = vect_evaluate_float4e2m1_from_bits(
3751+
subbyte.float32_to_float4e2m1_unpacked(np_fp32)
3752+
)
37513753
output = make_tensor(
37523754
"y", getattr(TensorProto, to_type), input_shape, expected.tolist()
37533755
)
@@ -20767,45 +20769,33 @@ expect(
2076720769

2076820770

2076920771
<details>
20770-
<summary>e2m1</summary>
20772+
<summary>e4m3fn</summary>
2077120773

2077220774
```python
2077320775
node = onnx.helper.make_node(
2077420776
"QuantizeLinear",
2077520777
inputs=["x", "y_scale", "y_zero_point"],
2077620778
outputs=["y"],
20777-
axis=0,
2077820779
)
2077920780

20780-
x = np.array(
20781-
[
20782-
[0.0, 2.5, 4.8, 8.6],
20783-
[-30, -20, 6, 9],
20784-
[-0.0, -2.5, -4.8, -8.6],
20785-
]
20786-
).astype(np.float32)
20787-
20788-
y_scale = np.asarray([2.0, 3.0, 4.0], dtype=np.float32)
20789-
y_zero_point = make_tensor(
20790-
"y_zero_point", TensorProto.FLOAT4E2M1, y_scale.shape, np.zeros_like(y_scale)
20791-
)
20792-
y = make_tensor(
20793-
"y", TensorProto.FLOAT4E2M1, x.shape, [0, 1, 2, 4, -6, -6, 2, 3, 0, -0.5, -1, -2]
20794-
)
20781+
x = np.array([0.0, 1.0, 2.0, 100000.0, 200.0]).astype(np.float32)
20782+
y_scale = np.float32(2)
20783+
y_zero_point = make_tensor("y_zero_point", TensorProto.FLOAT8E4M3FN, [1], [0])
20784+
y = make_tensor("y", TensorProto.FLOAT8E4M3FN, [5], [0, 0.5, 1, 448, 96])
2079520785

2079620786
expect(
2079720787
node,
2079820788
inputs=[x, y_scale, y_zero_point],
2079920789
outputs=[y],
20800-
name="test_quantizelinear_float4e2m1",
20790+
name="test_quantizelinear_e4m3fn",
2080120791
)
2080220792
```
2080320793

2080420794
</details>
2080520795

2080620796

2080720797
<details>
20808-
<summary>e4m3fn</summary>
20798+
<summary>e5m2</summary>
2080920799

2081020800
```python
2081120801
node = onnx.helper.make_node(
@@ -20816,40 +20806,58 @@ node = onnx.helper.make_node(
2081620806

2081720807
x = np.array([0.0, 1.0, 2.0, 100000.0, 200.0]).astype(np.float32)
2081820808
y_scale = np.float32(2)
20819-
y_zero_point = make_tensor("y_zero_point", TensorProto.FLOAT8E4M3FN, [1], [0])
20820-
y = make_tensor("y", TensorProto.FLOAT8E4M3FN, [5], [0, 0.5, 1, 448, 96])
20809+
y_zero_point = make_tensor("y_zero_point", TensorProto.FLOAT8E5M2, [1], [0.0])
20810+
y = make_tensor("y", TensorProto.FLOAT8E5M2, [5], [0, 0.5, 1, 49152, 96])
2082120811

2082220812
expect(
2082320813
node,
2082420814
inputs=[x, y_scale, y_zero_point],
2082520815
outputs=[y],
20826-
name="test_quantizelinear_e4m3fn",
20816+
name="test_quantizelinear_e5m2",
2082720817
)
2082820818
```
2082920819

2083020820
</details>
2083120821

2083220822

2083320823
<details>
20834-
<summary>e5m2</summary>
20824+
<summary>float4e2m1</summary>
2083520825

2083620826
```python
2083720827
node = onnx.helper.make_node(
2083820828
"QuantizeLinear",
2083920829
inputs=["x", "y_scale", "y_zero_point"],
2084020830
outputs=["y"],
20831+
axis=0,
2084120832
)
2084220833

20843-
x = np.array([0.0, 1.0, 2.0, 100000.0, 200.0]).astype(np.float32)
20844-
y_scale = np.float32(2)
20845-
y_zero_point = make_tensor("y_zero_point", TensorProto.FLOAT8E5M2, [1], [0.0])
20846-
y = make_tensor("y", TensorProto.FLOAT8E5M2, [5], [0, 0.5, 1, 49152, 96])
20834+
x = np.array(
20835+
[
20836+
[0.0, 2.5, 4.8, 8.6],
20837+
[-30, -20, 6, 9],
20838+
[-0.0, -2.5, -4.8, -8.6],
20839+
]
20840+
).astype(np.float32)
20841+
20842+
y_scale = np.asarray([2.0, 3.0, 4.0], dtype=np.float32)
20843+
y_zero_point = make_tensor(
20844+
"y_zero_point",
20845+
TensorProto.FLOAT4E2M1,
20846+
y_scale.shape,
20847+
np.zeros_like(y_scale),
20848+
)
20849+
y = make_tensor(
20850+
"y",
20851+
TensorProto.FLOAT4E2M1,
20852+
x.shape,
20853+
[0, 1, 2, 4, -6, -6, 2, 3, 0, -0.5, -1, -2],
20854+
)
2084720855

2084820856
expect(
2084920857
node,
2085020858
inputs=[x, y_scale, y_zero_point],
2085120859
outputs=[y],
20852-
name="test_quantizelinear_e5m2",
20860+
name="test_quantizelinear_float4e2m1",
2085320861
)
2085420862
```
2085520863

docs/TestCoverage.md

+37-29
Original file line numberDiff line numberDiff line change
@@ -2616,7 +2616,9 @@ for from_type, to_type in test_cases:
26162616
raise ValueError(
26172617
f"Conversion from {from_type} to {to_type} is not tested."
26182618
)
2619-
expected = vect_evaluate_float4e2m1_from_bits(subbyte.float32_to_float4e2m1_unpacked(np_fp32))
2619+
expected = vect_evaluate_float4e2m1_from_bits(
2620+
subbyte.float32_to_float4e2m1_unpacked(np_fp32)
2621+
)
26202622
output = make_tensor(
26212623
"y", getattr(TensorProto, to_type), input_shape, expected.tolist()
26222624
)
@@ -14184,43 +14186,31 @@ expect(
1418414186

1418514187
</details>
1418614188
<details>
14187-
<summary>e2m1</summary>
14189+
<summary>e4m3fn</summary>
1418814190

1418914191
```python
1419014192
node = onnx.helper.make_node(
1419114193
"QuantizeLinear",
1419214194
inputs=["x", "y_scale", "y_zero_point"],
1419314195
outputs=["y"],
14194-
axis=0,
1419514196
)
1419614197

14197-
x = np.array(
14198-
[
14199-
[0.0, 2.5, 4.8, 8.6],
14200-
[-30, -20, 6, 9],
14201-
[-0.0, -2.5, -4.8, -8.6],
14202-
]
14203-
).astype(np.float32)
14204-
14205-
y_scale = np.asarray([2.0, 3.0, 4.0], dtype=np.float32)
14206-
y_zero_point = make_tensor(
14207-
"y_zero_point", TensorProto.FLOAT4E2M1, y_scale.shape, np.zeros_like(y_scale)
14208-
)
14209-
y = make_tensor(
14210-
"y", TensorProto.FLOAT4E2M1, x.shape, [0, 1, 2, 4, -6, -6, 2, 3, 0, -0.5, -1, -2]
14211-
)
14198+
x = np.array([0.0, 1.0, 2.0, 100000.0, 200.0]).astype(np.float32)
14199+
y_scale = np.float32(2)
14200+
y_zero_point = make_tensor("y_zero_point", TensorProto.FLOAT8E4M3FN, [1], [0])
14201+
y = make_tensor("y", TensorProto.FLOAT8E4M3FN, [5], [0, 0.5, 1, 448, 96])
1421214202

1421314203
expect(
1421414204
node,
1421514205
inputs=[x, y_scale, y_zero_point],
1421614206
outputs=[y],
14217-
name="test_quantizelinear_float4e2m1",
14207+
name="test_quantizelinear_e4m3fn",
1421814208
)
1421914209
```
1422014210

1422114211
</details>
1422214212
<details>
14223-
<summary>e4m3fn</summary>
14213+
<summary>e5m2</summary>
1422414214

1422514215
```python
1422614216
node = onnx.helper.make_node(
@@ -14231,38 +14221,56 @@ node = onnx.helper.make_node(
1423114221

1423214222
x = np.array([0.0, 1.0, 2.0, 100000.0, 200.0]).astype(np.float32)
1423314223
y_scale = np.float32(2)
14234-
y_zero_point = make_tensor("y_zero_point", TensorProto.FLOAT8E4M3FN, [1], [0])
14235-
y = make_tensor("y", TensorProto.FLOAT8E4M3FN, [5], [0, 0.5, 1, 448, 96])
14224+
y_zero_point = make_tensor("y_zero_point", TensorProto.FLOAT8E5M2, [1], [0.0])
14225+
y = make_tensor("y", TensorProto.FLOAT8E5M2, [5], [0, 0.5, 1, 49152, 96])
1423614226

1423714227
expect(
1423814228
node,
1423914229
inputs=[x, y_scale, y_zero_point],
1424014230
outputs=[y],
14241-
name="test_quantizelinear_e4m3fn",
14231+
name="test_quantizelinear_e5m2",
1424214232
)
1424314233
```
1424414234

1424514235
</details>
1424614236
<details>
14247-
<summary>e5m2</summary>
14237+
<summary>float4e2m1</summary>
1424814238

1424914239
```python
1425014240
node = onnx.helper.make_node(
1425114241
"QuantizeLinear",
1425214242
inputs=["x", "y_scale", "y_zero_point"],
1425314243
outputs=["y"],
14244+
axis=0,
1425414245
)
1425514246

14256-
x = np.array([0.0, 1.0, 2.0, 100000.0, 200.0]).astype(np.float32)
14257-
y_scale = np.float32(2)
14258-
y_zero_point = make_tensor("y_zero_point", TensorProto.FLOAT8E5M2, [1], [0.0])
14259-
y = make_tensor("y", TensorProto.FLOAT8E5M2, [5], [0, 0.5, 1, 49152, 96])
14247+
x = np.array(
14248+
[
14249+
[0.0, 2.5, 4.8, 8.6],
14250+
[-30, -20, 6, 9],
14251+
[-0.0, -2.5, -4.8, -8.6],
14252+
]
14253+
).astype(np.float32)
14254+
14255+
y_scale = np.asarray([2.0, 3.0, 4.0], dtype=np.float32)
14256+
y_zero_point = make_tensor(
14257+
"y_zero_point",
14258+
TensorProto.FLOAT4E2M1,
14259+
y_scale.shape,
14260+
np.zeros_like(y_scale),
14261+
)
14262+
y = make_tensor(
14263+
"y",
14264+
TensorProto.FLOAT4E2M1,
14265+
x.shape,
14266+
[0, 1, 2, 4, -6, -6, 2, 3, 0, -0.5, -1, -2],
14267+
)
1426014268

1426114269
expect(
1426214270
node,
1426314271
inputs=[x, y_scale, y_zero_point],
1426414272
outputs=[y],
14265-
name="test_quantizelinear_e5m2",
14273+
name="test_quantizelinear_float4e2m1",
1426614274
)
1426714275
```
1426814276

docs/docsgen/source/technical/float4.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ The float value is defined by the following expressions:
5757
:header-rows: 1
5858
5959
* -
60-
- E4M3FN
60+
- E2M1
6161
* - exponent :math:`\neq` 0
6262
- :math:`(-1)^S 2^{\sum_{i=1}^2 b_i 2^{i-1} - 1} \left( 1 + b_0 2^{-1} \right)`
6363
* - exponent :math:`=` 0
@@ -108,8 +108,8 @@ The behavior for downcasting to float 4 is summarzied below
108108
## Packing and Unpacking
109109

110110
Float4 is stored as 2x4bit in a single byte.
111-
The first element is stored in the 4 LSB and the second element is stored in the 4 MSB.
112-
i.e. for elements x, y, that are consecutive elements in the array:
111+
The first element is stored in the 4 LSB and the second element is stored in the 4 MSB,
112+
i.e. for elements `x` and `y` that are consecutive elements in the array:
113113
```
114114
pack(x,y): y << 4 | x & 0x0F
115115
unpack(z): x = z & 0x0F, y = z >> 4

onnx/_custom_element_types.py

+3
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@
5252
#: than its onnx size.
5353
int4 = np.dtype((np.int8, {"int4": (np.int8, 0)}))
5454

55+
#: Defines float 4 e2m1 type, see See :ref:`onnx-detail-float4` for technical details.
56+
#: Do note that one integer is stored using a byte and therefore is twice bigger
57+
#: than its onnx size.
5558
float4e2m1 = np.dtype((np.uint8, {"float4e2m1": (np.uint8, 0)}))
5659

5760
mapping_name_to_data_type = {

onnx/backend/test/case/node/quantizelinear.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ def export_int4() -> None:
277277
)
278278

279279
@staticmethod
280-
def export_e2m1() -> None:
280+
def export_float4e2m1() -> None:
281281
node = onnx.helper.make_node(
282282
"QuantizeLinear",
283283
inputs=["x", "y_scale", "y_zero_point"],

onnx/helper.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -673,15 +673,14 @@ def pack_float32_to_4bit(array: np.ndarray | Sequence, signed: bool) -> np.ndarr
673673

674674

675675
def pack_float32_to_float4e2m1(array: np.ndarray | Sequence) -> np.ndarray:
676-
"""Convert an array of float32 value to a 4bit data-type and pack every two concecutive elements in a byte.
677-
See :ref:`onnx-detail-int4` for technical details.
676+
"""Convert an array of float32 value to float4e2m1 and pack every two concecutive elements in a byte.
677+
See :ref:`onnx-detail-float4` for technical details.
678678
679679
Args:
680680
array: array of float to convert and pack
681-
signed: Whether the 4 bit variant is signed or unsigned
682681
683682
Returns:
684-
Packed array with size `ceil(farray.size/2)` (single dimension).
683+
Packed array of float4e2m1 (as uint8) with size `ceil(farray.size/2)` (single dimension).
685684
"""
686685
if not isinstance(array, np.ndarray):
687686
array = np.asarray(array, dtype=np.float32)
@@ -757,7 +756,6 @@ def make_tensor(
757756
data_type in (TensorProto.UINT4, TensorProto.INT4, TensorProto.FLOAT4E2M1)
758757
and len(vals) == expected_size + 0.5
759758
):
760-
print("$$$$$$", data_type, vals, len(vals), expected_size, dims)
761759
raise ValueError(
762760
f"Number of values does not match tensor's size. Expected {expected_size}, but it is {len(vals)}. "
763761
)

onnx/numpy_helper.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ def unpack_int4(
221221
return res
222222

223223

224-
def evaluate_float4e2m1_from_bits(x):
224+
def evaluate_float4e2m1_from_bits(x: np.uint8) -> np.float32:
225225
"""Evaluate the numerical value of a single float4e2m1 element represented as uint8
226226
See :ref:`onnx-detail-int4` for technical details.
227227
@@ -232,7 +232,6 @@ def evaluate_float4e2m1_from_bits(x):
232232
Packed array with size `ceil(farray.size/2)` (single dimension).
233233
"""
234234
# x is stored in 4 LSB of int
235-
# assert(isinstance(x, np.uint8))
236235
S = -1 if bool(x & 0x08) else 1
237236
M = x & 0x01
238237
E = (x & 0x06) >> 1
@@ -509,8 +508,8 @@ def to_array(tensor: TensorProto, base_dir: str = "") -> np.ndarray:
509508
data = tensor.int32_data
510509
shape = tuple(tensor.dims)
511510

512-
# 2 packed fp4e2m1 elements must be represented as a single uint8 value.
513-
# Therefore, y is np.uint8 (not the dtype to which the int4 maps)
511+
# 2 packed float4e2m1 elements must be represented as a single uint8 value.
512+
# Therefore, y is np.uint8.
514513
y = np.empty(len(data), dtype=custom_np_types.float4e2m1).ravel() # type: ignore[assignment]
515514
for i, d in enumerate(data):
516515
y[i] = d

onnx/reference/ops/op_cast.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -139,12 +139,8 @@ def cast_to(x, to, saturate): # noqa: PLR0911
139139
return res.astype(np.float16)
140140

141141
if to == TensorProto.FLOAT4E2M1:
142-
xf = x.astype(np.float32).ravel()
143-
y = np.empty(xf.shape, dtype=float4e2m1).ravel()
144-
for i in range(y.shape[0]):
145-
el = subbyte.float32_to_float4e2m1_unpacked(xf[i])
146-
y[i] = el
147-
# This operator preduces a tensor with the same shape for INT4.
142+
xf = x.astype(np.float32)
143+
y = subbyte.float32_to_float4e2m1_unpacked(xf)
148144
return y.reshape(x.shape)
149145

150146
if to == TensorProto.STRING:

onnx/reference/ops/op_dequantize_linear.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -93,12 +93,8 @@ def _run(
9393
elif x_type == TensorProto.FLOAT8E5M2FNUZ:
9494
dx = float8e5m2_to_float32(x, fn=True, uz=True)
9595
elif x_type == TensorProto.FLOAT4E2M1:
96-
x_shape = x.shape
97-
dx = np.empty(x.shape, dtype=np.float32).ravel()
98-
xr = x.ravel()
99-
for i in range(x.flatten().size):
100-
dx[i] = evaluate_float4e2m1_from_bits(xr[i])
101-
dx.reshape(x_shape)
96+
evaluate_func = np.vectorize(evaluate_float4e2m1_from_bits)
97+
dx = evaluate_func(x)
10298
else:
10399
dx = x.astype(np.float32)
104100
y = dx * reshape_input(x_scale, x.shape, axis, block_size)

onnx/reference/ops/op_quantize_linear.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -207,11 +207,8 @@ def _run(
207207
return (i4,) # type: ignore[attr-defined]
208208

209209
if tensor_type == TensorProto.FLOAT4E2M1:
210-
# x += zero_point
211-
def single_func(x):
212-
return subbyte.float32_to_float4e2m1_unpacked(x)
213-
214-
func = np.vectorize(single_func)
210+
x += zero_point
211+
func = np.vectorize(subbyte.float32_to_float4e2m1_unpacked)
215212
f4 = func(x)
216213
return (f4,) # type: ignore[attr-defined]
217214

0 commit comments

Comments
 (0)