Skip to content

Commit 113e330

Browse files
authored
fix bf16 and add comments (#4106)
1 parent 69aa278 commit 113e330

File tree

4 files changed

+8
-4
lines changed

4 files changed

+8
-4
lines changed

fastdeploy/model_executor/layers/linear.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,7 @@ def weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = N
498498
if weight_need_transpose:
499499
loaded_weight = get_tensor(loaded_weight)
500500
loaded_weight = loaded_weight.transpose([1, 0])
501+
# Avoid redundant transpose of fused weights when weight_loader is called iteratively
501502
param.weight_need_transpose = False
502503
# Loaded weight is already fused on disk.
503504
shard_offsets = [
@@ -638,6 +639,7 @@ def weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = N
638639
if weight_need_transpose:
639640
loaded_weight = get_tensor(loaded_weight)
640641
loaded_weight = loaded_weight.transpose([1, 0])
642+
# Avoid redundant transpose of fused weights when weight_loader is called iteratively
641643
param.weight_need_transpose = False
642644
# Loaded weight is already fused on disk
643645
shard_offsets = [

fastdeploy/model_executor/layers/quantization/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
"""
1717
from typing import Dict, List, Type
1818

19+
from fastdeploy.utils import parse_quantization
20+
1921
from .quant_base import QuantConfigBase
2022

2123
QUANTIZATION_METHODS: List[str] = [
@@ -35,6 +37,8 @@
3537

3638

3739
def parse_quant_config(args, model_config, is_ernie, is_v1_loader):
40+
if args.quantization is not None and isinstance(args.quantization, str):
41+
args.quantization = parse_quantization(args.quantization)
3842
# 1.model_config.is_quantized
3943
# TODO(bukejiyu) model_config.is_quantized is v0 only need to be removed in future
4044
if model_config.model_format == "torch":

fastdeploy/model_executor/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ def rename_offline_ckpt_suffix_to_fd_suffix(
240240
}
241241
moe_quant_type = ""
242242
dense_quant_type = ""
243-
if fd_config.quant_config is None:
243+
if fd_config.quant_config is not None:
244244
if fd_config.quant_config.name() == "mix_quant":
245245
moe_quant_type = fd_config.quant_config.moe_quant_type
246246
dense_quant_type = fd_config.quant_config.dense_quant_type

fastdeploy/worker/worker_process.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
from fastdeploy.inter_communicator import IPCSignal
4545
from fastdeploy.model_executor.layers.quantization import parse_quant_config
4646
from fastdeploy.platforms import current_platform
47-
from fastdeploy.utils import get_logger, parse_quantization
47+
from fastdeploy.utils import get_logger
4848
from fastdeploy.worker.worker_base import WorkerBase
4949

5050
logger = get_logger("worker_process", "worker_process.log")
@@ -655,8 +655,6 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig:
655655
FDConfig: Initialized FastDeploy configuration object
656656
"""
657657
# RL rollout
658-
if args.quantization is not None and isinstance(args.quantization, str):
659-
args.quantization = parse_quantization(args.quantization)
660658
paddle.set_default_dtype(args.dtype)
661659
model_config = ModelConfig(vars(args))
662660
device_config = DeviceConfig(vars(args))

0 commit comments

Comments
 (0)