From fc3ac8075b081defebe49ef3610c6210d9b5107b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Fri, 7 Feb 2025 20:13:50 +0000 Subject: [PATCH] Remove outdated TODO --- server/text_generation_server/layers/fp8.py | 1 - 1 file changed, 1 deletion(-) diff --git a/server/text_generation_server/layers/fp8.py b/server/text_generation_server/layers/fp8.py index fe138a4af72..04689ed9233 100644 --- a/server/text_generation_server/layers/fp8.py +++ b/server/text_generation_server/layers/fp8.py @@ -23,7 +23,6 @@ quantization = None try: - # TODO: needs to be ported over to MoE and used on CUDA. from moe_kernels.fp8_utils import w8a8_block_fp8_matmul, per_token_group_quant_fp8 except ImportError: w8a8_block_fp8_matmul = None