Skip to content

Commit 4602764

Browse files
committed
update
1 parent a35b5f9 commit 4602764

File tree

1 file changed

+26
-13
lines changed

1 file changed

+26
-13
lines changed

src/compressed_tensors/quantization/quant_scheme.py

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -163,46 +163,59 @@ def is_preset_scheme(name: str) -> bool:
163163
)
164164
)
165165

166-
MXFP4 = dict(
166+
167+
NVFP4 = dict(
167168
weights=QuantizationArgs(
168169
num_bits=4,
169170
type=QuantizationType.FLOAT,
170-
strategy=QuantizationStrategy.GROUP,
171+
strategy=QuantizationStrategy.TENSOR_GROUP,
171172
symmetric=True,
172173
dynamic=False,
173-
group_size=32,
174+
group_size=16,
175+
observer="static_minmax",
174176
),
175177
input_activations=QuantizationArgs(
178+
num_bits=4,
179+
type=QuantizationType.FLOAT,
180+
strategy=QuantizationStrategy.TENSOR_GROUP,
181+
symmetric=True,
182+
dynamic=DynamicType.LOCAL,
183+
group_size=16,
184+
observer="static_minmax",
185+
),
186+
)
187+
188+
MXFP4A16 = dict(
189+
weights=QuantizationArgs(
176190
num_bits=4,
177191
type=QuantizationType.FLOAT,
178192
strategy=QuantizationStrategy.GROUP,
179-
dynamic=True,
180193
symmetric=True,
194+
dynamic=False,
181195
group_size=32,
182-
),
196+
)
183197
)
184198

185-
NVFP4 = dict(
199+
MXFP4 = dict(
186200
weights=QuantizationArgs(
187201
num_bits=4,
188202
type=QuantizationType.FLOAT,
189-
strategy=QuantizationStrategy.TENSOR_GROUP,
203+
strategy=QuantizationStrategy.GROUP,
190204
symmetric=True,
191205
dynamic=False,
192-
group_size=16,
193-
observer="static_minmax",
206+
group_size=32,
194207
),
195208
input_activations=QuantizationArgs(
196209
num_bits=4,
197210
type=QuantizationType.FLOAT,
198-
strategy=QuantizationStrategy.TENSOR_GROUP,
211+
strategy=QuantizationStrategy.GROUP,
212+
dynamic=True,
199213
symmetric=True,
200-
dynamic=DynamicType.LOCAL,
201-
group_size=16,
202-
observer="static_minmax",
214+
group_size=32,
203215
),
204216
)
205217

218+
206219
# 8 bit integer weights and 8 bit activations quantization
207220
INT8_W8A8 = dict(
208221
weights=QuantizationArgs(

0 commit comments

Comments
 (0)