Skip to content

Commit

Permalink
Change inline weights to Neff default value to True (#590)
Browse files Browse the repository at this point in the history
* inline to True

* precise inline or not in test
  • Loading branch information
JingyaHuang authored May 7, 2024
1 parent 9361b55 commit 7e21931
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 8 deletions.
2 changes: 1 addition & 1 deletion optimum/exporters/neuron/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ def main_export(
cache_dir: Optional[str] = None,
disable_neuron_cache: Optional[bool] = False,
compiler_workdir: Optional[Union[str, Path]] = None,
inline_weights_to_neff: bool = False,
inline_weights_to_neff: bool = True,
optlevel: str = "2",
trust_remote_code: bool = False,
subfolder: str = "",
Expand Down
8 changes: 4 additions & 4 deletions optimum/exporters/neuron/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ def export_neuronx(
config: "NeuronDefaultConfig",
output: Path,
compiler_workdir: Optional[Path] = None,
inline_weights_to_neff: bool = False,
inline_weights_to_neff: bool = True,
optlevel: str = "2",
auto_cast: Optional[str] = None,
auto_cast_type: str = "bf16",
Expand All @@ -482,7 +482,7 @@ def export_neuronx(
Directory to store the exported Neuron model.
compiler_workdir (`Optional[Path]`, defaults to `None`):
The directory used by neuronx-cc, where you can find intermediary outputs (neff, weight, hlo...).
inline_weights_to_neff (`bool`, defaults to `False`):
inline_weights_to_neff (`bool`, defaults to `True`):
Whether to inline the weights to the neff graph. If set to False, weights will be seperated from the neff.
optlevel (`str`, defaults to `"2"`):
The level of optimization the compiler should perform. Can be `"1"`, `"2"` or `"3"`, defaults to "2".
Expand Down Expand Up @@ -610,7 +610,7 @@ def export_neuron(
config: "NeuronDefaultConfig",
output: Path,
compiler_workdir: Optional[Path] = None,
inline_weights_to_neff: bool = False,
inline_weights_to_neff: bool = True,
auto_cast: Optional[str] = None,
auto_cast_type: str = "bf16",
disable_fast_relayout: bool = False,
Expand All @@ -628,7 +628,7 @@ def export_neuron(
Directory to store the exported Neuron model.
compiler_workdir (`Optional[Path]`, defaults to `None`):
The directory used by neuron-cc, where you can find intermediary outputs (neff, weight, hlo...).
inline_weights_to_neff (`bool`, defaults to `False`):
inline_weights_to_neff (`bool`, defaults to `True`):
Whether to inline the weights to the neff graph. If set to False, weights will be seperated from the neff.
auto_cast (`Optional[str]`, defaults to `None`):
Whether to cast operations from FP32 to lower precision to speed up the inference. Can be `None`, `"matmul"` or `"all"`, you should use `None` to disable any auto-casting, use `"matmul"` to cast FP32 matrix multiplication operations, and use `"all"` to cast all FP32 operations.
Expand Down
2 changes: 1 addition & 1 deletion optimum/neuron/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def _export(
cache_dir: Optional[str] = None,
compiler_workdir: Optional[Union[str, Path]] = None,
disable_neuron_cache: bool = False,
inline_weights_to_neff: bool = False,
inline_weights_to_neff: bool = True,
optlevel: str = "2",
subfolder: str = "",
local_files_only: bool = False,
Expand Down
4 changes: 2 additions & 2 deletions optimum/neuron/modeling_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,7 @@ def _export(
cache_dir: Optional[str] = None,
compiler_workdir: Optional[str] = None,
disable_neuron_cache: bool = False,
inline_weights_to_neff: bool = False,
inline_weights_to_neff: bool = True,
optlevel: str = "2",
subfolder: str = "",
local_files_only: bool = False,
Expand Down Expand Up @@ -623,7 +623,7 @@ def _export(
Path to a directory in which the neuron compiler will store all intermediary files during the compilation(neff, weight, hlo graph...).
disable_neuron_cache (`bool`, defaults to `False`):
Whether to disable automatic caching of compiled models. If set to True, will not load neuron cache nor cache the compiled artifacts.
inline_weights_to_neff (`bool`, defaults to `False`):
inline_weights_to_neff (`bool`, defaults to `True`):
Whether to inline the weights to the neff graph. If set to False, weights will be seperated from the neff.
optlevel (`str`, defaults to `"2"`):
The level of optimization the compiler should perform. Can be `"1"`, `"2"` or `"3"`, defaults to "2".
Expand Down
3 changes: 3 additions & 0 deletions tests/cache/test_neuronx_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def export_encoder_model(model_id):
dynamic_batch_size=False,
batch_size=batch_size,
sequence_length=sequence_length,
inline_weights_to_neff=False,
)


Expand All @@ -107,6 +108,7 @@ def export_stable_diffusion_model(model_id):
height=height,
width=width,
num_images_per_prompt=num_images_per_prompt,
inline_weights_to_neff=False,
)


Expand All @@ -122,6 +124,7 @@ def export_stable_diffusion_xl_model(model_id):
height=height,
width=width,
num_images_per_prompt=num_images_per_prompt,
inline_weights_to_neff=False,
)


Expand Down

0 comments on commit 7e21931

Please sign in to comment.