Skip to content

Commit d40689e

Browse files
authored
Merge branch 'main' into 96_awq_match_module_set
2 parents 451c113 + 4982529 commit d40689e

File tree

19 files changed

+55
-216
lines changed

19 files changed

+55
-216
lines changed

examples/awq/qwen3-vl-30b-a3b-Instruct-example.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,16 @@
33
from transformers import AutoProcessor, Qwen3VLMoeForConditionalGeneration
44

55
from llmcompressor import oneshot
6-
from llmcompressor.modeling import replace_modules_for_calibration
76
from llmcompressor.modifiers.awq import AWQModifier
87
from llmcompressor.utils import dispatch_for_generation
98

10-
# NOTE: Requires a minimum of transformers 4.57.0
11-
129
MODEL_ID = "Qwen/Qwen3-VL-30B-A3B-Instruct"
1310

1411
# Load model.
1512
model = Qwen3VLMoeForConditionalGeneration.from_pretrained(
1613
MODEL_ID, torch_dtype=torch.bfloat16, device_map=None, trust_remote_code=True
1714
)
1815
processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
19-
model = replace_modules_for_calibration(model)
2016

2117
DATASET_ID = "neuralmagic/calibration"
2218
NUM_CALIBRATION_SAMPLES = 256

examples/quantization_w4a4_fp4/qwen3_vl_moe_w4a4_fp4.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,15 @@
33
from transformers import AutoProcessor, Qwen3VLMoeForConditionalGeneration
44

55
from llmcompressor import oneshot
6-
from llmcompressor.modeling import replace_modules_for_calibration
76
from llmcompressor.modifiers.quantization import QuantizationModifier
87
from llmcompressor.utils import dispatch_for_generation
98

10-
# NOTE: Requires a minimum of transformers 4.57.0
11-
129
MODEL_ID = "Qwen/Qwen3-VL-235B-A22B-Instruct"
1310

1411

1512
# Load model.
1613
model = Qwen3VLMoeForConditionalGeneration.from_pretrained(MODEL_ID, torch_dtype="auto")
1714
processor = AutoProcessor.from_pretrained(MODEL_ID)
18-
model = replace_modules_for_calibration(model)
1915

2016
DATASET_ID = "neuralmagic/calibration"
2117
NUM_CALIBRATION_SAMPLES = 20

examples/quantization_w8a8_fp8/qwen3_vl_moe_fp8_example.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from transformers import AutoProcessor, Qwen3VLMoeForConditionalGeneration
22

33
from llmcompressor import oneshot
4-
from llmcompressor.modeling import replace_modules_for_calibration
54
from llmcompressor.modifiers.quantization import QuantizationModifier
65

76
# NOTE: Requires a minimum of transformers 4.57.0
@@ -11,7 +10,6 @@
1110
# Load model.
1211
model = Qwen3VLMoeForConditionalGeneration.from_pretrained(MODEL_ID, torch_dtype="auto")
1312
processor = AutoProcessor.from_pretrained(MODEL_ID)
14-
model = replace_modules_for_calibration(model)
1513

1614
# Configure the quantization algorithm and scheme.
1715
# In this case, we:

src/llmcompressor/entrypoints/oneshot.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -201,22 +201,24 @@ def apply_recipe_modifiers(
201201
session.reset()
202202

203203
# (Helen INFERENG-661): validate recipe modifiers before initialization
204-
session.initialize(
205-
model=self.model,
206-
start=-1,
207-
recipe=self.recipe,
208-
recipe_stage=recipe_stage,
209-
recipe_args=self.recipe_args.recipe_args,
210-
calib_data=calibration_dataloader,
211-
)
212-
user_pipeline = self.dataset_args.pipeline
213-
modifiers = session.lifecycle.recipe.modifiers
214-
pipeline = CalibrationPipeline.from_modifiers(modifiers, user=user_pipeline)
215204
# Apply MoE calibration context for the entire calibration process
216205
with moe_calibration_context(
217206
self.model,
218207
calibrate_all_experts=self.dataset_args.moe_calibrate_all_experts,
219208
):
209+
session.initialize(
210+
model=self.model,
211+
start=-1,
212+
recipe=self.recipe,
213+
recipe_stage=recipe_stage,
214+
recipe_args=self.recipe_args.recipe_args,
215+
calib_data=calibration_dataloader,
216+
)
217+
user_pipeline = self.dataset_args.pipeline
218+
pipeline = CalibrationPipeline.from_modifiers(
219+
session.lifecycle.recipe.modifiers, user=user_pipeline
220+
)
221+
220222
pipeline(
221223
self.model,
222224
calibration_dataloader,

src/llmcompressor/modeling/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,3 @@
1818
# TODO: add granite4, Qwen3Next
1919

2020
from .fuse import *
21-
from .prepare import *

src/llmcompressor/modeling/deepseek_v3.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -68,20 +68,3 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
6868
hidden_states = final_hidden_states.type(hidden_states.dtype).view(*orig_shape)
6969
hidden_states = hidden_states + self.shared_experts(residuals)
7070
return hidden_states
71-
72-
73-
# Legacy function for backward compatibility
74-
def replace(
75-
config: DeepseekV3Config,
76-
module: OriginalDeepseekV3MoE,
77-
calibrate_all_experts: bool,
78-
):
79-
"""
80-
Legacy replacement function.
81-
Use CalibrationDeepseekV3MoE instead.
82-
"""
83-
return CalibrationDeepseekV3MoE(
84-
module,
85-
config,
86-
calibrate_all_experts=calibrate_all_experts,
87-
)

src/llmcompressor/modeling/llama4.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -87,16 +87,3 @@ def __init__(self, config: Llama4TextConfig, original: Llama4TextExperts):
8787
self[i].gate_proj.weight.data = gate_proj.t().contiguous()
8888
self[i].up_proj.weight.data = up_proj.t().contiguous()
8989
self[i].down_proj.weight.data = down.t().contiguous()
90-
91-
92-
# Legacy function for backward compatibility
93-
def replace(config: Llama4Config, module: Llama4TextMoe, calibrate_all_experts: bool):
94-
"""
95-
Legacy replacement function.
96-
Use SequentialLlama4TextMoe instead.
97-
"""
98-
return SequentialLlama4TextMoe(
99-
module,
100-
config,
101-
calibrate_all_experts=calibrate_all_experts,
102-
)

src/llmcompressor/modeling/prepare.py

Lines changed: 0 additions & 62 deletions
This file was deleted.

src/llmcompressor/modeling/qwen3_moe.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -97,20 +97,3 @@ def forward(self, hidden_states: torch.Tensor):
9797

9898
def restore(self, original: torch.nn.Module) -> torch.nn.Module:
9999
return original
100-
101-
102-
# Legacy function for backward compatibility
103-
def replace(
104-
config: Qwen3MoeConfig,
105-
module: OriginalQwen3MoeSparseMoeBlock,
106-
calibrate_all_experts: bool,
107-
):
108-
"""
109-
Legacy replacement function.
110-
Use CalibrationQwen3MoeSparseMoeBlock instead.
111-
"""
112-
return CalibrationQwen3MoeSparseMoeBlock(
113-
module,
114-
config,
115-
calibrate_all_experts=calibrate_all_experts,
116-
)

src/llmcompressor/modeling/qwen3_next_moe.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -123,13 +123,3 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
123123

124124
def restore(self, original: torch.nn.Module) -> torch.nn.Module:
125125
return original
126-
127-
128-
def replace(
129-
config,
130-
module,
131-
calibrate_all_experts,
132-
):
133-
return CalibrationQwen3NextSparseMoeBlock(
134-
config=config, original=module, calibrate_all_experts=calibrate_all_experts
135-
)

0 commit comments

Comments
 (0)