Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .azure-pipelines/scripts/ut/run_ut_cuda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ function run_unit_test() {
CMAKE_ARGS="-DGGML_CUDA=on -DLLAVA_BUILD=off" uv pip install llama-cpp-python
uv pip install 'git+https://github.com/ggml-org/llama.cpp.git#subdirectory=gguf-py'
uv pip install -r requirements.txt
uv pip install -r requirements_diffusion.txt

uv pip list
export COVERAGE_RCFILE=${REPO_PATH}/.azure-pipelines/scripts/ut/.coverage
Expand Down
2 changes: 1 addition & 1 deletion auto_round/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from auto_round.autoround import AutoRound

# support for old api
from auto_round.autoround import AutoRoundLLM, AutoRoundMLLM, AutoRoundAdam
from auto_round.autoround import AutoRoundLLM, AutoRoundMLLM, AutoRoundAdam, AutoRoundDiffusion
from auto_round.utils import LazyImport


Expand Down
83 changes: 81 additions & 2 deletions auto_round/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,25 @@ def __init__(self, *args, **kwargs):
help="the template for building training dataset. It can be a custom one.",
)

## ===================== diffusion model ==================
self.add_argument(
"--guidance_scale",
default=7.5,
type=float,
)

self.add_argument(
"--num_inference_steps",
default=50,
type=int,
)

self.add_argument(
"--generator_seed",
default=None,
type=int,
)

## ======================= eval =======================
self.add_argument(
"--tasks",
Expand Down Expand Up @@ -258,6 +277,22 @@ def __init__(self, *args, **kwargs):
"--eval_model_dtype", default=None, type=str, help="the torch_dytpe to load the model for evaluation."
)

## ======================= diffusion model eval =======================
self.add_argument("--prompt_file", default=None, type=str, help="the prompt file to load prmpt.")

self.add_argument("--prompt", default=None, type=str, help="the prompt for test.")

self.add_argument(
"--metrics",
"--metric",
default="clip",
help="support clip, clip-iqa, imagereward",
)

self.add_argument(
"--image_save_dir", default="./tmp_image_save", type=str, help="path to save generated images"
)


def setup_parser():
parser = BasicArgumentParser()
Expand Down Expand Up @@ -427,6 +462,7 @@ def tune(args):
)

from auto_round.compressors import (
DiffusionExtraConfig,
ExtraConfig,
MLLMExtraConfig,
SchemeExtraConfig,
Expand Down Expand Up @@ -466,9 +502,15 @@ def tune(args):
mllm_config = MLLMExtraConfig(
quant_nontext_module=args.quant_nontext_module, extra_data_dir=args.extra_data_dir, template=args.template
)
diffusion_config = DiffusionExtraConfig(
guidance_scale=args.guidance_scale,
num_inference_steps=args.num_inference_steps,
generator_seed=args.generator_seed,
)
extra_config.tuning_config = tuning_config
extra_config.scheme_config = scheme_config
extra_config.mllm_config = mllm_config
extra_config.diffusion_config = diffusion_config

autoround: BaseCompressor = AutoRound(
model=model_name,
Expand Down Expand Up @@ -524,6 +566,45 @@ def tune(args):
model.eval()
clear_memory()

eval_model_dtype = get_model_dtype(args.eval_model_dtype, "auto")

# diffusion model has different evaluation path
if getattr(autoround, "diffusion", False):
pipe = autoround.pipe
pipe.to(model.dtype)
pipe.transformer = model
device_str = detect_device(device_str)
pipe = pipe.to(device_str)
if pipe.dtype != eval_model_dtype and eval_model_dtype != "auto":
pipe.to(getattr(torch, eval_model_dtype))

gen_kwargs = {
"guidance_scale": args.guidance_scale,
"output_type": "pil",
"num_inference_steps": args.num_inference_steps,
"generator": (
None
if args.generator_seed is None
else torch.Generator(device=pipe.device).manual_seed(args.generator_seed)
),
}
if not os.path.exists(args.image_save_dir):
os.makedirs(args.image_save_dir)

if args.prompt is not None:
outputs = pipe(prompt=args.prompt, **gen_kwargs)
outputs.images[0].save(os.path.join(args.image_save_dir, "img.png"))
logger.info(
f"Image generated with prompt {args.prompt} is saved as {os.path.join(args.image_save_dir, 'img.png')}"
)

if args.prompt_file is not None:
from auto_round.compressors.diffusion import diffusion_eval

metrics = args.metrics.split(",")
diffusion_eval(pipe, args.prompt_file, metrics, args.image_save_dir, 1, gen_kwargs)
return

lm_eval_version = get_library_version("lm-eval")

eval_folder = folders[-1]
Expand All @@ -545,8 +626,6 @@ def tune(args):

import time

eval_model_dtype = get_model_dtype(args.eval_model_dtype, "auto")

if (autoround.act_bits <= 8 and formats[-1] == "fake") or eval_gguf_model:
if eval_gguf_model:
# for file in os.listdir(eval_folder):
Expand Down
93 changes: 91 additions & 2 deletions auto_round/autoround.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,14 @@
from auto_round.compressors import (
AdamCompressor,
BaseCompressor,
DiffusionCompressor,
ExtraConfig,
LLMCompressor,
MLLMCompressor,
)
from auto_round.logger import deprecated, logger
from auto_round.schemes import QuantizationScheme
from auto_round.utils import is_mllm_model
from auto_round.utils import is_diffusion_model, is_mllm_model


class AutoRound:
Expand Down Expand Up @@ -77,7 +78,7 @@ def __new__(
seed: int = 42,
# for adam
enable_adam: bool = False,
# for MLLM
# for MLLM and Diffusion
extra_config: ExtraConfig = None,
**kwargs,
) -> BaseCompressor:
Expand Down Expand Up @@ -145,9 +146,17 @@ def __new__(
if (extra_config and not extra_config.mllm_config.is_default()) or is_mllm_model(model):
logger.info("using MLLM mode for multimodal model.")
model_cls.append(MLLMCompressor)
if extra_config:
extra_config.diffusion_config = None
elif (extra_config and not extra_config.diffusion_config.is_default()) or is_diffusion_model(model):
logger.info("using Diffusion mode for diffusion model.")
model_cls.append(DiffusionCompressor)
if extra_config:
extra_config.mllm_config = None
else:
if extra_config:
extra_config.mllm_config = None
extra_config.diffusion_config = None
model_cls.append(LLMCompressor)

if enable_adam:
Expand Down Expand Up @@ -540,3 +549,83 @@ def __init__(
seed=seed,
**kwargs,
)


@deprecated("AutoRound")
class AutoRoundDiffusion(DiffusionCompressor):
"""Class for automatic rounding-based quantization with Diffusion models.

Args:
model: The PyTorch model to be quantized.
tokenizer: An optional tokenizer for processing input data, is not used for diffusion models.
guidance_scale (float): Control how much the image generation process follows the text prompt.
The more it is, the more closely it follows the prompt (default is 7.5).
num_inference_steps (int): The reference number of denoising steps (default is 50).
generator_seed (int): A sees that controls the initial noise from which an image is generated (default is None).
scheme: (str| dict | QuantizationScheme ): A preset scheme that defines the quantization configurations.
layer_config (dict): Configuration for weight quantization (default is None).
dataset: The path or name of the calib dataset.
iters (int): Number of iterations (default is 200).
seqlen (int): Length of the sequence.
nsamples (int): Number of samples (default is 128).
batch_size (int): Batch size for training (default is 8).
gradient_accumulate_steps (int): Number of gradient accumulation steps (default is 1).
low_gpu_mem_usage (bool): Whether to use low GPU memory (default is False).
device_map (str | dict | int | torch.device, optional): Device placement map. Defaults to 0.
enable_torch_compile (bool): Whether to enable torch compile to optimize quant_block/layer
**kwargs: Additional keyword arguments.
"""

bits: int | None
group_size: int | None
sym: bool | None
data_type: str | None
act_bits: int | None
act_group_size: int | None
act_sym: bool | None
act_data_type: str | None
act_dynamic: bool | None
super_bits: int | None
super_group_size: int | None

def __init__(
self,
model: Union[object, str],
tokenizer=None,
guidance_scale: float = 7.5,
num_inference_steps: int = 50,
generator_seed: int = None,
scheme: Union[str, dict, QuantizationScheme] = "W4A16",
layer_config: dict[str, Union[str, dict, QuantizationScheme]] = None,
dataset: Union[str, list, tuple, torch.utils.data.DataLoader] = "coco2014",
iters: int = 200,
seqlen: int = 2048,
nsamples: int = 128,
batch_size: int = 8,
gradient_accumulate_steps: int = 1,
low_gpu_mem_usage: bool = False,
device_map: Union[str, torch.device, int, dict] = 0,
enable_torch_compile: bool = False,
seed: int = 42,
**kwargs,
):
super().__init__(
model=model,
tokenizer=None,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
generator_seed=generator_seed,
scheme=scheme,
layer_config=layer_config,
dataset=dataset,
iters=iters,
seqlen=seqlen,
nsamples=nsamples,
batch_size=batch_size,
gradient_accumulate_steps=gradient_accumulate_steps,
low_gpu_mem_usage=low_gpu_mem_usage,
device_map=device_map,
enable_torch_compile=enable_torch_compile,
seed=seed,
**kwargs,
)
2 changes: 2 additions & 0 deletions auto_round/compressors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@

from auto_round.compressors.base import *
from auto_round.compressors.mllm.compressor import MLLMCompressor
from auto_round.compressors.diffusion.compressor import DiffusionCompressor
from auto_round.compressors.config import (
DiffusionExtraConfig,
ExtraConfig,
MLLMExtraConfig,
SchemeExtraConfig,
Expand Down
Loading