Tweak comments to use "compact/swizzled format"

timmoon10 · timmoon10 · commit 52ce3a4b0ad1 · 2025-12-06T02:53:47.000Z
Signed-off-by: Tim Moon &lt;tmoon@nvidia.com&gt;
diff --git a/transformer_engine/common/cast/mxfp8/dequantize_mxfp8.cuh b/transformer_engine/common/cast/mxfp8/dequantize_mxfp8.cuh
@@ -240,7 +240,7 @@ inline void dequantize(const Tensor &input, Tensor *output, cudaStream_t stream)
     NVTE_CHECK(is_fp8_dtype(input.columnwise_data.dtype), "Input must have FP8 type.");
   }
 
-  NVTE_CHECK(!input.with_gemm_swizzled_scales, "Input must have scales in compact layout.");
+  NVTE_CHECK(!input.with_gemm_swizzled_scales, "Input must have scales in compact format.");
   NVTE_CHECK(!is_fp8_dtype(output->data.dtype), "Output must be in higher precision.");
   NVTE_CHECK(output->data.shape == input.data.shape, "Input and output shapes need to match.");
 
diff --git a/transformer_engine/common/cast/mxfp8/gated_mxfp8.cuh b/transformer_engine/common/cast/mxfp8/gated_mxfp8.cuh
@@ -686,7 +686,7 @@ void quantize_gated(const Tensor &gated_input, const Tensor &grad, Tensor *outpu
   if (USE_COLWISE_SCALING) {
     NVTE_CHECK(output->columnwise_scale_inv.dptr != nullptr, "Scaling tensor must be allocated.");
   }
-  NVTE_CHECK(!output->with_gemm_swizzled_scales, "Scaling tensor must be in compact layout.");
+  NVTE_CHECK(!output->with_gemm_swizzled_scales, "Scaling tensor must be in compact format.");
 
   ScalingType scaling_type;
   if (USE_ROWWISE_SCALING && (!USE_COLWISE_SCALING)) {
diff --git a/transformer_engine/common/cast/mxfp8/quantize_mxfp8.cuh b/transformer_engine/common/cast/mxfp8/quantize_mxfp8.cuh
@@ -551,7 +551,7 @@ void quantize(const Tensor &input, const Tensor *act_input, const Tensor *noop,
   NVTE_CHECK(input.has_data(), "Cannot quantize tensor without rowwise data.");
   NVTE_CHECK(is_fp8_dtype(output->dtype()), "Output must have FP8 type.");
   NVTE_CHECK(!output->with_gemm_swizzled_scales,
-             "Output must have scales in compact layout.");
+             "Output must have scales in compact format.");
   if (use_rowwise_scaling) {
     NVTE_CHECK(output->scale_inv.dptr != nullptr, "Scaling tensor must be allocated");
   }
diff --git a/transformer_engine/common/cast/nvfp4/dequantize_nvfp4.cuh b/transformer_engine/common/cast/nvfp4/dequantize_nvfp4.cuh
@@ -80,7 +80,7 @@ inline void dequantize(const Tensor &input, Tensor *output, cudaStream_t stream)
   CheckInputTensor(input, "input");
   CheckOutputTensor(*output, "output");
   NVTE_CHECK(input.data.dtype == DType::kFloat4E2M1, "Input must have FP4 type.");
-  NVTE_CHECK(!input.with_gemm_swizzled_scales, "Input must have scales in compact layout.");
+  NVTE_CHECK(!input.with_gemm_swizzled_scales, "Input must have scales in compact format.");
   NVTE_CHECK(is_high_precision_dtype(output->data.dtype), "Output must be in higher precision.");
   NVTE_CHECK(output->data.shape == input.data.shape, "Input and output shapes need to match.");
 
diff --git a/transformer_engine/common/cast/nvfp4/quantize_nvfp4.cuh b/transformer_engine/common/cast/nvfp4/quantize_nvfp4.cuh
@@ -559,7 +559,7 @@ inline void quantize(const Tensor &input, const Tensor *noop, Tensor *output, cu
   NVTE_CHECK(is_fp4_dtype(output->data.dtype), "Output must have FP4 type.");
   NVTE_CHECK(output->scale_inv.dptr != nullptr, "Scaling tensor must be allocated");
   NVTE_CHECK(!output->with_gemm_swizzled_scales,
-             "Output must have scales in compact layout.");
+             "Output must have scales in compact format.");
 
   bool use_colwise_scaling = output->has_columnwise_data();
   if (use_colwise_scaling) {
diff --git a/transformer_engine/common/cast/nvfp4/quantize_transpose_nvfp4.cuh b/transformer_engine/common/cast/nvfp4/quantize_transpose_nvfp4.cuh
@@ -1180,7 +1180,7 @@ void quantize_transpose(const Tensor &input, const Tensor *noop, Tensor *output,
   NVTE_CHECK(is_fp4_dtype(output->data.dtype), "Output must have FP4 type.");
   NVTE_CHECK(output->scale_inv.dptr != nullptr, "Scaling tensor must be allocated");
   NVTE_CHECK(!output->with_gemm_swizzled_scales,
-             "Output must have scales in compact layout.");
+             "Output must have scales in compact format.");
   if (return_transpose) {
     NVTE_CHECK(output->has_columnwise_data(), "NVFP4 transposed output tensor must be allocated.");
     NVTE_CHECK(is_fp4_dtype(output->columnwise_data.dtype),
diff --git a/transformer_engine/common/comm_gemm_overlap/comm_gemm_overlap.cpp b/transformer_engine/common/comm_gemm_overlap/comm_gemm_overlap.cpp
@@ -184,7 +184,7 @@ TensorWrapper CommOverlapCore::get_tensor_chunk(const TensorWrapper &source, siz
                              &has_swizzled_scales, sizeof(has_swizzled_scales),
                              nullptr);
     NVTE_CHECK(has_swizzled_scales,
-               "Expected MFP8 tensor with scales in GEMM swizzled layout.");
+               "Expected MFP8 tensor with scales in GEMM swizzled format.");
   }
 
   // Tensor dimensions
diff --git a/transformer_engine/common/common.h b/transformer_engine/common/common.h
@@ -133,7 +133,7 @@ struct Tensor {
 
   NVTEScalingMode scaling_mode;
   NVTETensor nvte_tensor;
-  /*! Whether scaling factors are in layout expected by GEMM */
+  /*! Whether scaling factors are in format expected by GEMM */
   bool with_gemm_swizzled_scales = false;
 
   /*! Map from NVTETensorParam to parameter sizes */
diff --git a/transformer_engine/common/swizzle/swizzle.cu b/transformer_engine/common/swizzle/swizzle.cu
@@ -341,9 +341,9 @@ void swizzle_scaling_factors(const Tensor* input, Tensor* output, cudaStream_t s
   CheckInputTensor(*input, "scaling_factor_input");
   CheckInputTensor(*output, "scaling_factor_output");
   NVTE_CHECK(!input->with_gemm_swizzled_scales,
-             "Expected input tensor with scales in compact layout.");
+             "Expected input tensor with scales in compact format.");
   NVTE_CHECK(output->with_gemm_swizzled_scales,
-             "Expected output tensor with scales in swizzled layout for GEMM.");
+             "Expected output tensor with scales in GEMM swizzled format.");
   switch (scaling_mode) {
     case NVTE_MXFP8_1D_SCALING:
       NVTE_CHECK(is_fp8_dtype(input->dtype()), "Input tensor has invalid dtype (expected FP8, got ",
@@ -661,9 +661,9 @@ void multi_tensor_swizzle_scaling_factors(const std::vector<Tensor*>& input,
         (is_fp8 && is_mxfp8_scaling(scaling_mode)) || (is_fp4 && is_nvfp4_scaling(scaling_mode)),
         "Not implemented scaling mode " + to_string(scaling_mode) + ".");
     NVTE_CHECK(!input[i]->with_gemm_swizzled_scales,
-               "Expected input tensors with scales in compact layout.");
+               "Expected input tensors with scales in compact format.");
     NVTE_CHECK(output[i]->with_gemm_swizzled_scales,
-               "Expected output tensors with scales in swizzled layout for GEMM.");
+               "Expected output tensors with scales in GEMM swizzled format.");
 
     // We don't allow empty tensors. They should be filtered out before calling this function.
     NVTE_CHECK(input[i]->numel() != 0, "Tensor input[", i, "] is empty.");
diff --git a/transformer_engine/common/swizzle/swizzle_block_scaling.cu b/transformer_engine/common/swizzle/swizzle_block_scaling.cu
@@ -262,9 +262,9 @@ void swizzle_block_scaling_to_mxfp8_scaling_factors(const Tensor* input, Tensor*
              "Output must have E8M0 scaling factors");
 
   NVTE_CHECK(input->with_gemm_swizzled_scales,
-             "Expected input tensor with scales in swizzled layout for GEMM.");
+             "Expected input tensor with scales in GEMM swizzled format.");
   NVTE_CHECK(output->with_gemm_swizzled_scales,
-             "Expected output tensor with scales in swizzled layout for GEMM.");
+             "Expected output tensor with scales in GEMM swizzled format.");
 
   NVTE_CHECK(input->data.dptr != nullptr, "Input must have rowwise data");
   NVTE_CHECK(output->data.dptr == input->data.dptr, "Output must share data with input");
diff --git a/transformer_engine/pytorch/csrc/extensions/pybind.cpp b/transformer_engine/pytorch/csrc/extensions/pybind.cpp
@@ -291,7 +291,7 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
   m.def("fused_multi_row_unpadding", &transformer_engine::pytorch::fused_multi_row_unpadding,
         "Fused Multi-tensor unpadding", py::call_guard<py::gil_scoped_release>());
   m.def("swizzle_scales_for_gemm_", &transformer_engine::pytorch::inplace_swizzle_scale_for_gemm,
-        "Convert tensor scaling factors into layout expeted by GEMM");
+        "Convert tensor block scales into GEMM swizzled format");
 
   // attention kernels
   m.def("fa_prepare_fwd", &transformer_engine::pytorch::fa_prepare_fwd,
diff --git a/transformer_engine/pytorch/csrc/util.h b/transformer_engine/pytorch/csrc/util.h
@@ -18,7 +18,7 @@
 namespace transformer_engine {
 namespace pytorch {
 
-/*! \brief Convert tensor scaling factors into a layout expected by GEMM.
+/*! \brief Convert tensor block scales into GEMM swizzled format.
  *
  *  The returned swizzled scales should be kept alive during the GEMM.
  */
@@ -27,7 +27,7 @@ std::tuple<std::optional<at::Tensor>, std::optional<at::Tensor>>
                             bool rowwise_usage,
                             bool columnwise_usage);
 
-/*! \brief Convert multiple tensor scaling factors into a layout expected by GEMM.
+/*! \brief Convert multiple tensor block scales into GEMM swizzled format.
  *
  *  The returned swizzled scales should be kept alive during the GEMMs.
  */
diff --git a/transformer_engine/pytorch/tensor/mxfp8_tensor.py b/transformer_engine/pytorch/tensor/mxfp8_tensor.py
@@ -193,7 +193,7 @@ def onnx_quantize(self, tensor: torch.Tensor) -> QuantizedTensor:
     def onnx_dequantize(self, tensor: Union[MXFP8TensorStorage, MXFP8Tensor]) -> torch.Tensor:
         if tensor._with_gemm_swizzled_scales:
             raise NotImplementedError(
-                "ONNX MXFP8 dequantization is only supported with scales in compact layout."
+                "ONNX MXFP8 dequantization is only supported with scales in compact format."
             )
         return torch.ops.tex.mxfp8_dequantize(tensor._rowwise_data, tensor._rowwise_scale_inv)
 
diff --git a/transformer_engine/pytorch/tensor/storage/mxfp8_tensor_storage.py b/transformer_engine/pytorch/tensor/storage/mxfp8_tensor_storage.py
@@ -70,7 +70,7 @@ class MXFP8TensorStorage(QuantizedTensorStorage):
     _quantizer: Optional[Quantizer]
     # FP8 data type
     _fp8_dtype: TE_DType
-    # Whether scaling factors are in the swizzled layout expected by
+    # Whether scaling factors are in the swizzled format expected by
     # GEMM
     _with_gemm_swizzled_scales: bool
 

Original file line number	Diff line number	Diff line change
`@@ -240,7 +240,7 @@ inline void dequantize(const Tensor &input, Tensor *output, cudaStream_t stream)`
`240`	`240`	`NVTE_CHECK(is_fp8_dtype(input.columnwise_data.dtype), "Input must have FP8 type.");`
`241`	`241`	`}`
`242`	`242`
`243`		`- NVTE_CHECK(!input.with_gemm_swizzled_scales, "Input must have scales in compact layout.");`
	`243`	`+ NVTE_CHECK(!input.with_gemm_swizzled_scales, "Input must have scales in compact format.");`
`244`	`244`	`NVTE_CHECK(!is_fp8_dtype(output->data.dtype), "Output must be in higher precision.");`
`245`	`245`	`NVTE_CHECK(output->data.shape == input.data.shape, "Input and output shapes need to match.");`
`246`	`246`
Original file line number	Diff line number	Diff line change
`@@ -686,7 +686,7 @@ void quantize_gated(const Tensor &gated_input, const Tensor &grad, Tensor *outpu`
`686`	`686`	`if (USE_COLWISE_SCALING) {`
`687`	`687`	`NVTE_CHECK(output->columnwise_scale_inv.dptr != nullptr, "Scaling tensor must be allocated.");`
`688`	`688`	`}`
`689`		`- NVTE_CHECK(!output->with_gemm_swizzled_scales, "Scaling tensor must be in compact layout.");`
	`689`	`+ NVTE_CHECK(!output->with_gemm_swizzled_scales, "Scaling tensor must be in compact format.");`
`690`	`690`
`691`	`691`	`ScalingType scaling_type;`
`692`	`692`	`if (USE_ROWWISE_SCALING && (!USE_COLWISE_SCALING)) {`
Original file line number	Diff line number	Diff line change
`@@ -551,7 +551,7 @@ void quantize(const Tensor &input, const Tensor act_input, const Tensor noop,`
`551`	`551`	`NVTE_CHECK(input.has_data(), "Cannot quantize tensor without rowwise data.");`
`552`	`552`	`NVTE_CHECK(is_fp8_dtype(output->dtype()), "Output must have FP8 type.");`
`553`	`553`	`NVTE_CHECK(!output->with_gemm_swizzled_scales,`
`554`		`- "Output must have scales in compact layout.");`
	`554`	`+ "Output must have scales in compact format.");`
`555`	`555`	`if (use_rowwise_scaling) {`
`556`	`556`	`NVTE_CHECK(output->scale_inv.dptr != nullptr, "Scaling tensor must be allocated");`
`557`	`557`	`}`
Original file line number	Diff line number	Diff line change
`@@ -184,7 +184,7 @@ TensorWrapper CommOverlapCore::get_tensor_chunk(const TensorWrapper &source, siz`
`184`	`184`	`&has_swizzled_scales, sizeof(has_swizzled_scales),`
`185`	`185`	`nullptr);`
`186`	`186`	`NVTE_CHECK(has_swizzled_scales,`
`187`		`- "Expected MFP8 tensor with scales in GEMM swizzled layout.");`
	`187`	`+ "Expected MFP8 tensor with scales in GEMM swizzled format.");`
`188`	`188`	`}`
`189`	`189`
`190`	`190`	`// Tensor dimensions`