diff --git a/scripts/generate_tiny_models.py b/scripts/generate_tiny_models.py deleted file mode 100644 index 73b7d166949..00000000000 --- a/scripts/generate_tiny_models.py +++ /dev/null @@ -1,450 +0,0 @@ -# Copyright 2020-2026 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This script generates tiny models used in the TRL library for unit tests. It pushes them to the Hub under the -# `trl-internal-testing` organization. -# This script is meant to be run when adding new tiny model to the TRL library. - -import torch -from huggingface_hub import HfApi, ModelCard -from peft import LoraConfig, get_peft_model -from torch import nn -from transformers import ( - AutoConfig, - AutoProcessor, - AutoTokenizer, - BartModel, - Cohere2Config, - Cohere2ForCausalLM, - CohereConfig, - CohereForCausalLM, - DeepseekV3Config, - DeepseekV3ForCausalLM, - FalconMambaConfig, - FalconMambaForCausalLM, - Gemma2Config, - Gemma2ForCausalLM, - Gemma3ForConditionalGeneration, - Gemma4ForConditionalGeneration, - GemmaConfig, - GemmaForCausalLM, - GenerationConfig, - Glm4MoeConfig, - Glm4MoeForCausalLM, - GPT2Config, - GPT2LMHeadModel, - GPTNeoXConfig, - GPTNeoXForCausalLM, - GPTNeoXForSequenceClassification, - GptOssConfig, - GptOssForCausalLM, - Idefics2Config, - Idefics2ForConditionalGeneration, - Idefics3ForConditionalGeneration, - InternVLForConditionalGeneration, - LlamaConfig, - LlamaForCausalLM, - LlamaForSequenceClassification, - LlavaForConditionalGeneration, - LlavaNextForConditionalGeneration, - MistralConfig, - MistralForCausalLM, - OPTConfig, - OPTForCausalLM, - PaliGemmaForConditionalGeneration, - Phi3Config, - Phi3ForCausalLM, - Qwen2_5_VLConfig, - Qwen2_5_VLForConditionalGeneration, - Qwen2Config, - Qwen2ForCausalLM, - Qwen2ForSequenceClassification, - Qwen2VLConfig, - Qwen2VLForConditionalGeneration, - Qwen3_5Config, - Qwen3_5ForConditionalGeneration, - Qwen3_5MoeConfig, - Qwen3_5MoeForConditionalGeneration, - Qwen3Config, - Qwen3ForCausalLM, - Qwen3ForSequenceClassification, - Qwen3MoeConfig, - Qwen3MoeForCausalLM, - Qwen3MoeForSequenceClassification, - Qwen3VLConfig, - Qwen3VLForConditionalGeneration, - SmolVLMForConditionalGeneration, - T5ForConditionalGeneration, -) - - -ORGANIZATION = "trl-internal-testing" - -MODEL_CARD = """ ---- -library_name: transformers -tags: [trl] ---- - -# Tiny {model_class_name} - -This is a minimal model built for unit tests in the [TRL](https://github.com/huggingface/trl) library. -""" - - -api = HfApi() - - -def push_to_hub(model, tokenizer, generation_config, prefix=None, suffix=None, force=False): - model_class_name = model.__class__.__name__ - content = MODEL_CARD.format(model_class_name=model_class_name) - model_card = ModelCard(content) - if prefix is not None: - model_class_name = f"{prefix}-{model_class_name}" - repo_id = f"{ORGANIZATION}/{model_class_name}" - if suffix is not None: - repo_id += f"-{suffix}" - - if api.repo_exists(repo_id) and not force: - print(f"Model {repo_id} already exists, skipping") - else: - model.push_to_hub(repo_id) - model_card.push_to_hub(repo_id) - if tokenizer is not None: - tokenizer.push_to_hub(repo_id) - if generation_config is not None: - generation_config.push_to_hub(repo_id) - - -def init_weights_tiny_model(model): - """ - Initialize tiny test models to avoid NaNs from uninitialized weights. - - Uses safe defaults: - - Linear/Conv1d: Xavier uniform (weights), zero (biases) - - Embedding: Normal(0, 0.02) - - LayerNorm: Ones (weights), zero (biases) - - Args: - model: PyTorch model (modified in-place) - """ - for module in model.modules(): - if isinstance(module, nn.Linear): - # Attention/MLP projections → Xavier or Normal - if module.bias is not None: - nn.init.zeros_(module.bias) - nn.init.xavier_uniform_(module.weight) - - elif isinstance(module, nn.Embedding): - # Token embeddings → GPT-style Normal - nn.init.normal_(module.weight, mean=0.0, std=0.02) - - elif isinstance(module, nn.LayerNorm): - # LayerNorm weights always 1, bias 0 - nn.init.ones_(module.weight) - if module.bias is not None: - nn.init.zeros_(module.bias) - - elif isinstance(module, nn.Conv1d): - # Convolutional layers → Xavier or Normal - if module.bias is not None: - nn.init.zeros_(module.bias) - nn.init.xavier_uniform_(module.weight) - - -# Decoder models -for model_id, config_class, model_class, dtype, suffix in [ - # ("bigscience/bloomz-560m", BloomConfig, BloomForCausalLM, None), # loading fails with this model, see https://huggingface.co/bigscience/bloomz-560m/discussions/14 - ("CohereLabs/aya-expanse-8b", CohereConfig, CohereForCausalLM, torch.float16, None), - ("CohereLabs/tiny-aya-earth", Cohere2Config, Cohere2ForCausalLM, torch.bfloat16, None), - ("deepseek-ai/DeepSeek-R1", DeepseekV3Config, DeepseekV3ForCausalLM, torch.bfloat16, None), - # It's important to have R1-0528 as it doesn't have the same chat template - ("deepseek-ai/DeepSeek-R1-0528", DeepseekV3Config, DeepseekV3ForCausalLM, torch.bfloat16, "0528"), - ("tiiuae/falcon-7b-instruct", FalconMambaConfig, FalconMambaForCausalLM, torch.bfloat16, None), - ("google/gemma-2-2b-it", Gemma2Config, Gemma2ForCausalLM, torch.bfloat16, None), - ("google/gemma-7b-it", GemmaConfig, GemmaForCausalLM, torch.bfloat16, None), - ("openai-community/gpt2", GPT2Config, GPT2LMHeadModel, torch.float32, None), - ("EleutherAI/pythia-14m", GPTNeoXConfig, GPTNeoXForCausalLM, torch.float16, None), - ("meta-llama/Meta-Llama-3-8B-Instruct", LlamaConfig, LlamaForCausalLM, torch.bfloat16, "3"), - ("meta-llama/Llama-3.1-8B-Instruct", LlamaConfig, LlamaForCausalLM, torch.bfloat16, "3.1"), - ("meta-llama/Llama-3.2-1B-Instruct", LlamaConfig, LlamaForCausalLM, torch.bfloat16, "3.2"), - ("mistralai/Mistral-7B-Instruct-v0.1", MistralConfig, MistralForCausalLM, torch.bfloat16, "0.1"), - ("mistralai/Mistral-7B-Instruct-v0.2", MistralConfig, MistralForCausalLM, torch.bfloat16, "0.2"), - ("facebook/opt-1.3b", OPTConfig, OPTForCausalLM, torch.float16, None), - ("microsoft/Phi-3-mini-4k-instruct", Phi3Config, Phi3ForCausalLM, torch.bfloat16, "3"), - ("microsoft/Phi-3.5-mini-instruct", Phi3Config, Phi3ForCausalLM, torch.bfloat16, "3.5"), - ("Qwen/Qwen2.5-32B-Instruct", Qwen2Config, Qwen2ForCausalLM, torch.bfloat16, "2.5"), - ("Qwen/Qwen2.5-Coder-0.5B", Qwen2Config, Qwen2ForCausalLM, torch.bfloat16, "2.5-Coder"), - ("Qwen/Qwen3-8B", Qwen3Config, Qwen3ForCausalLM, torch.bfloat16, None), - # It's important to have Qwen3-4B-Instruct-2507 as it doesn't have the same chat template (non-thinking variant) - ("Qwen/Qwen3-4B-Instruct-2507", Qwen3Config, Qwen3ForCausalLM, torch.bfloat16, "Instruct-2507"), -]: - revision = "refs/pr/14" if model_id == "Qwen/Qwen3-8B" else "main" # chat template with {% generation %} - tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision) - generation_config = GenerationConfig.from_pretrained(model_id, revision=revision) - config = config_class( - vocab_size=len(tokenizer.vocab), - hidden_size=8, - num_attention_heads=4, - num_key_value_heads=2, - num_hidden_layers=2, - intermediate_size=32, - ) - model = model_class(config).to(dtype=dtype) - init_weights_tiny_model(model) - push_to_hub(model, tokenizer, generation_config, "tiny", suffix) - -# MoE models -for model_id, config_class, model_class, dtype, suffix in [ - ("Qwen/Qwen3-30B-A3B", Qwen3MoeConfig, Qwen3MoeForCausalLM, torch.bfloat16, None), - ("openai/gpt-oss-20b", GptOssConfig, GptOssForCausalLM, torch.bfloat16, None), - ("zai-org/GLM-4.5", Glm4MoeConfig, Glm4MoeForCausalLM, torch.bfloat16, None), -]: - tokenizer = AutoTokenizer.from_pretrained(model_id) - generation_config = GenerationConfig.from_pretrained(model_id) - kwargs = {} - if model_id == "zai-org/GLM-4.5": - kwargs["n_routed_experts"] = 4 - elif model_id == "Qwen/Qwen3-30B-A3B": - kwargs["num_experts"] = 4 - elif model_id == "openai/gpt-oss-20b": - kwargs["num_local_experts"] = 4 - - config = config_class( - vocab_size=len(tokenizer.vocab), - hidden_size=8, - num_attention_heads=4, - num_key_value_heads=2, - num_hidden_layers=2, - intermediate_size=32, - num_experts_per_tok=2, - **kwargs, - ) - model = model_class(config).to(dtype=dtype) - init_weights_tiny_model(model) - push_to_hub(model, tokenizer, generation_config, "tiny", suffix) - -# Two slightly bigger models, required for vLLM testing -tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-32B-Instruct") -generation_config = GenerationConfig.from_pretrained("Qwen/Qwen2.5-32B-Instruct") -config = Qwen2Config( - vocab_size=len(tokenizer.vocab), - hidden_size=128, # increase hidden size so that hidden_size // num_attention_heads = 32, required for vLLM - num_attention_heads=4, - num_key_value_heads=2, - num_hidden_layers=2, - intermediate_size=32, -) -model = Qwen2ForCausalLM(config).to(dtype=torch.bfloat16) -push_to_hub(model, tokenizer, generation_config, "small", "2.5") - -tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-4B") -generation_config = GenerationConfig.from_pretrained("Qwen/Qwen3-4B") -config = Qwen3Config( - vocab_size=len(tokenizer.vocab), - hidden_size=128, # increase hidden size so that hidden_size // num_attention_heads = 32, required for vLLM - num_attention_heads=4, - num_key_value_heads=2, - num_hidden_layers=2, - intermediate_size=32, -) -model = Qwen3ForCausalLM(config).to(dtype=torch.bfloat16) -push_to_hub(model, tokenizer, generation_config, "small") - -# Reward models -for model_id, model_class, dtype, suffix in [ - ("EleutherAI/pythia-14m", GPTNeoXForSequenceClassification, torch.bfloat16, None), - ("meta-llama/Llama-3.2-1B-Instruct", LlamaForSequenceClassification, torch.bfloat16, "3.2"), - ("Qwen/Qwen2.5-32B-Instruct", Qwen2ForSequenceClassification, torch.bfloat16, "2.5"), - ("Qwen/Qwen3-4B", Qwen3ForSequenceClassification, torch.bfloat16, None), -]: - tokenizer = AutoTokenizer.from_pretrained(model_id) - generation_config = GenerationConfig.from_pretrained(model_id) - kwargs = { - "num_labels": 1, - "hidden_size": 16, - "num_attention_heads": 4, - "num_key_value_heads": 2, - "num_hidden_layers": 2, - "intermediate_size": 32, - } - config = AutoConfig.from_pretrained(model_id, **kwargs) - # Bug in transformers: it ignores num_hidden_layers to build layer_types - if model_id in ("Qwen/Qwen2.5-32B-Instruct", "Qwen/Qwen3-4B"): - config.layer_types = config.layer_types[:2] - model = model_class(config).to(dtype=dtype) - init_weights_tiny_model(model) - push_to_hub(model, tokenizer, generation_config, "tiny", suffix) - -# MoE Reward models -for model_id, model_class, dtype, suffix in [ - ("Qwen/Qwen3-30B-A3B", Qwen3MoeForSequenceClassification, torch.bfloat16, None), -]: - tokenizer = AutoTokenizer.from_pretrained(model_id) - generation_config = GenerationConfig.from_pretrained(model_id) - kwargs = { - "num_labels": 1, - "hidden_size": 16, - "num_attention_heads": 4, - "num_key_value_heads": 2, - "num_hidden_layers": 2, - "intermediate_size": 32, - "num_experts": 4, - "num_experts_per_tok": 2, - } - config = AutoConfig.from_pretrained(model_id, **kwargs) - model = model_class(config).to(dtype=dtype) - push_to_hub(model, tokenizer, generation_config, "tiny", suffix) - - -# Encoder-decoder models -for model_id, model_class, dtype, suffix in [ - ("facebook/bart-base", BartModel, torch.float32, None), - ("google/flan-t5-small", T5ForConditionalGeneration, torch.float32, None), -]: - tokenizer = AutoTokenizer.from_pretrained(model_id) - generation_config = GenerationConfig.from_pretrained(model_id) if model_id != "facebook/bart-base" else None - config = AutoConfig.from_pretrained(model_id) - config.d_model = 24 - model = model_class(config).to(dtype=dtype) - push_to_hub(model, tokenizer, generation_config, "tiny", suffix) - - -# Vision Language Models -for model_id, model_class, dtype in [ - ("google/gemma-3-4b-it", Gemma3ForConditionalGeneration, torch.bfloat16), - ("google/gemma-4-E2B-it", Gemma4ForConditionalGeneration, torch.bfloat16), - ("google/paligemma-3b-pt-224", PaliGemmaForConditionalGeneration, torch.float32), - ("HuggingFaceM4/idefics2-8b", Idefics2ForConditionalGeneration, torch.float32), - ("HuggingFaceM4/Idefics3-8B-Llama3", Idefics3ForConditionalGeneration, torch.bfloat16), - ("HuggingFaceTB/SmolVLM2-2.2B-Instruct", SmolVLMForConditionalGeneration, torch.float32), - ("llava-hf/llava-1.5-7b-hf", LlavaForConditionalGeneration, torch.float16), - # Original model dtype is float16, but it triggers CUDA device side assert error (see GH-4741): - ("llava-hf/llava-v1.6-mistral-7b-hf", LlavaNextForConditionalGeneration, torch.bfloat16), - ("OpenGVLab/InternVL3-8B-hf", InternVLForConditionalGeneration, torch.bfloat16), - ("Qwen/Qwen2-VL-2B-Instruct", Qwen2VLForConditionalGeneration, torch.bfloat16), - ("Qwen/Qwen2.5-VL-3B-Instruct", Qwen2_5_VLForConditionalGeneration, torch.bfloat16), - ("Qwen/Qwen3-VL-2B-Instruct", Qwen3VLForConditionalGeneration, torch.bfloat16), - ("Qwen/Qwen3.5-0.8B", Qwen3_5ForConditionalGeneration, torch.bfloat16), - ("Qwen/Qwen3.6-35B-A3B", Qwen3_5MoeForConditionalGeneration, torch.bfloat16), -]: - processor = AutoProcessor.from_pretrained(model_id) - generation_config = GenerationConfig.from_pretrained(model_id) if model_id != "Qwen/Qwen3.5-0.8B" else None - - text_config = { - "num_hidden_layers": 2, - "hidden_size": 16, - "num_attention_heads": 4, - "num_key_value_heads": 2, - "layer_types": None, # Set it automatically from num_hidden_layers - } - vision_config = { - "num_hidden_layers": 2, - "hidden_size": 16, - "num_attention_heads": 4, - "num_key_value_heads": 2, - "embed_dim": 64, - } - kwargs = {} - - if issubclass(model_class.config_class, (Qwen2VLConfig, Qwen2_5_VLConfig)): - text_config["rope_scaling"] = {"type": "default", "mrope_section": [1, 1], "rope_type": "default"} - vision_config["depth"] = 2 - # Different dict object from text_config; see GH-4101 and transformers#41020 - kwargs["rope_scaling"] = {"type": "default", "mrope_section": [1, 1], "rope_type": "default"} - - if issubclass(model_class.config_class, Qwen2_5_VLConfig): - vision_config["out_hidden_size"] = 16 - # Different dict object at the config root; see GH-4101 and transformers#41020 - kwargs["num_hidden_layers"] = 2 - kwargs["hidden_size"] = 16 - kwargs["num_attention_heads"] = 4 - - if issubclass(model_class.config_class, Idefics2Config): - kwargs["perceiver_config"] = {"hidden_size": 16} - - if issubclass(model_class.config_class, Qwen3VLConfig): - # So hasattr(config, "layer_types") is False - # See: https://github.com/huggingface/transformers/blob/fe5ca9ddaa07fac2872407e75c7a7661216ac956/src/transformers/models/qwen3_vl/modeling_qwen3_vl.py#L420 - del text_config["layer_types"] - # "mrope_section" needs 3 elements: for dim, offset in enumerate((1, 2), start=1): mrope_section[dim] - # See: https://github.com/huggingface/transformers/blob/fe5ca9ddaa07fac2872407e75c7a7661216ac956/src/transformers/models/qwen3_vl/modeling_qwen3_vl.py#L361 - text_config["rope_scaling"] = {"mrope_interleaved": True, "mrope_section": [2, 2, 2], "rope_type": "default"} - vision_config["depth"] = 2 - vision_config["out_hidden_size"] = 16 - - if issubclass(model_class.config_class, (Qwen3_5Config, Qwen3_5MoeConfig)): - # For tiny layer counts, default `layer_types` can end up with no full-attention layers (e.g. 2 layers and - # default interval 4), which breaks Qwen3.5 dynamic cache logic. Keep one full-attention layer at the end. - text_config["layer_types"] = ["linear_attention", "full_attention"] - text_config["full_attention_interval"] = 2 - # Qwen3.5-VL vision config expects `depth`/`num_heads`, not `num_hidden_layers`/`num_attention_heads`. - vision_config.pop("num_hidden_layers", None) - vision_config.pop("num_attention_heads", None) - vision_config.pop("num_key_value_heads", None) - vision_config.pop("embed_dim", None) - vision_config["depth"] = 2 - vision_config["num_heads"] = 4 - vision_config["intermediate_size"] = 32 - vision_config["out_hidden_size"] = 16 - - if issubclass(model_class.config_class, Qwen3_5MoeConfig): - text_config["num_experts"] = 4 - text_config["num_experts_per_tok"] = 2 - text_config["moe_intermediate_size"] = 32 - text_config["shared_expert_intermediate_size"] = 32 - - if model_id == "llava-hf/llava-v1.6-mistral-7b-hf": - # Hotfix: llava-hf/llava-v1.6-mistral-7b-hf mistakesly sets text_config.dtype to "bfloat16". - # See https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf/discussions/46 - text_config["dtype"] = None - - if model_class is Gemma4ForConditionalGeneration: - # Gemma4 rope validation fails when passing text_config as a dict, so we mutate the config directly. - config = AutoConfig.from_pretrained(model_id) - for k, v in text_config.items(): - setattr(config.text_config, k, v) - for k, v in vision_config.items(): - setattr(config.vision_config, k, v) - config.text_config.layer_types = ["sliding_attention", "full_attention"] - config.text_config.num_kv_shared_layers = 0 - config.text_config.global_head_dim = 8 - config.text_config.hidden_size_per_layer_input = 16 - config.audio_config = None - else: - config = AutoConfig.from_pretrained(model_id, text_config=text_config, vision_config=vision_config, **kwargs) - model = model_class(config).to(dtype=dtype) - - if model_id.startswith("Qwen/Qwen3.5"): - # Qwen3.5 models has some weights in float32, to mirror this in the tiny model we need to convert them to float32 manually. - # Qwen3.6 reuses the Qwen3_5Moe class but stores those weights in bf16, so the cast is not needed there. - for layer in model.model.language_model.layers: - if hasattr(layer, "linear_attn"): # applies to linear attention layers only - layer.linear_attn.A_log.data = layer.linear_attn.A_log.data.float() - layer.linear_attn.norm.weight.data = layer.linear_attn.norm.weight.data.float() - - suffix = "3.6" if model_id == "Qwen/Qwen3.6-35B-A3B" else None - push_to_hub(model, processor, generation_config, "tiny", suffix) - -# PEFT models -model = Qwen3ForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen3ForCausalLM", dtype="auto") -model = get_peft_model(model, LoraConfig()) -generation_config = GenerationConfig.from_pretrained("trl-internal-testing/tiny-Qwen3ForCausalLM") -push_to_hub(model, None, None, "tiny") - -# Same model, but different weights -model = Qwen3ForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen3ForCausalLM", dtype="auto") -model = get_peft_model(model, LoraConfig()) -generation_config = GenerationConfig.from_pretrained("trl-internal-testing/tiny-Qwen3ForCausalLM") -push_to_hub(model, None, None, "tiny", "2") diff --git a/scripts/generate_tiny_models/README.md b/scripts/generate_tiny_models/README.md new file mode 100644 index 00000000000..644039837ac --- /dev/null +++ b/scripts/generate_tiny_models/README.md @@ -0,0 +1,57 @@ +# Tiny model generation + +This directory contains one script per tiny model used by the TRL test suite. Each script builds a random-weight, minimally-sized model on top of a real tokenizer/processor and pushes it to the `trl-internal-testing` organization on the Hub. + +## Layout + +``` +generate_tiny_models/ +├── _common.py # shared helpers (push_to_hub, smoke_test, print_config_diff, ...) +├── for_causal_lm/ # *ForCausalLM + GPT-2 LM head + small/PEFT variants +├── for_sequence_classification/ # *ForSequenceClassification (reward models) +└── for_conditional_generation/ # *ForConditionalGeneration (VLMs + T5 + Bart encoder-decoder) +``` + +## Running + +From the repo root, invoke a script by its module path: + +```bash +python -m scripts.generate_tiny_models.for_causal_lm.qwen3_for_causal_lm +``` + +Each script: + +1. Checks that the installed `transformers` version matches the one pinned in the script (fails otherwise). +2. Builds the tiny model with random weights. +3. Runs `smoke_test` — a minimal forward pass to catch config misspecification and NaNs. +4. Runs `check_dtype_pattern` — reads the reference safetensors header via the Hub API and flags any tensor whose dtype diverges from the reference (catches e.g. fp32 norms kept inside a bf16 checkpoint). +5. Runs `print_config_diff` — prints every flat-key difference between the reference Hub config and the tiny model's config (for debugging scale-downs). +6. Pushes the model, tokenizer/processor, generation config, and model card to the Hub in a single commit. + +If the repo already exists on the Hub, the push is skipped by default. Pass `--create-pr` to open a PR against the existing repo instead: + +```bash +python -m scripts.generate_tiny_models.for_causal_lm.qwen3_for_causal_lm --create-pr +``` + +Direct overwrites of `main` aren't supported — update via `--create-pr` and merge the PR on the Hub. + +## Version pinning + +Every script declares `TRANSFORMERS_VERSION = "X.Y.Z"`, which is: + +``` +max(version that introduced the model, TRL's transformers floor) +``` + +The floor (currently `4.56.2`) is the `transformers>=` bound from `pyproject.toml`. Scripts for models introduced after the floor pin a higher version (e.g. Qwen3-VL pins `4.57.0`, Gemma4 pins `5.6.0`). The check is an exact match via `packaging.version.Version`; install the pinned version before running. + +**Why exact?** transformers is backward-compatible (a checkpoint saved by X loads on any ≥ X) but not forward-compatible. TRL CI runs against the floor, so tiny models must be saved with the oldest version that supports them — any newer save risks using config fields the floor can't parse. The exact-match check prevents accidental drift. + +## Adding a new tiny model + +1. Pick the right subfolder based on the model class suffix (`ForCausalLM`, `ForSequenceClassification`, `ForConditionalGeneration`). +2. Copy an existing script with the closest shape and adapt it — reference model id, config class, model class, special kwargs. +3. Set `TRANSFORMERS_VERSION` to the release that introduced the model (or to the TRL floor, whichever is higher). +4. Run it. Inspect the `[smoke_test]` and `[config_diff]` output before letting it push. diff --git a/scripts/generate_tiny_models/__init__.py b/scripts/generate_tiny_models/__init__.py new file mode 100644 index 00000000000..3d26f4482fe --- /dev/null +++ b/scripts/generate_tiny_models/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scripts/generate_tiny_models/_common.py b/scripts/generate_tiny_models/_common.py new file mode 100644 index 00000000000..e9b7c5c7acc --- /dev/null +++ b/scripts/generate_tiny_models/_common.py @@ -0,0 +1,265 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Shared utilities for the tiny-model generation scripts in this directory. +# Each sibling script builds a single tiny model and pushes it to the Hub under +# the `trl-internal-testing` organization. + +import argparse +import os +import tempfile + +import torch +from huggingface_hub import CommitOperationAdd, HfApi, ModelCard +from packaging.version import Version +from torch import nn +from transformers import AutoConfig, ProcessorMixin + + +ORGANIZATION = "trl-internal-testing" + +MODEL_CARD = """ +--- +library_name: transformers +tags: [trl] +--- + +# Tiny {model_class_name} + +This is a minimal model built for unit tests in the [TRL](https://github.com/huggingface/trl) library. +""" + + +api = HfApi() + + +def check_transformers_version(expected_version): + """Raise unless the installed transformers matches `expected_version` exactly.""" + import transformers + + if Version(transformers.__version__) != Version(expected_version): + raise RuntimeError( + f"This script requires transformers=={expected_version}, but {transformers.__version__} is installed." + ) + + +def smoke_test(model, tokenizer_or_processor=None): + """Run a minimal forward pass to sanity-check the tiny model doesn't crash or produce NaNs.""" + model.eval() + device = next(model.parameters()).device + + if isinstance(tokenizer_or_processor, ProcessorMixin): + # VLM path: build a dummy (image, text) input via the processor. + from PIL import Image + + processor = tokenizer_or_processor + red = Image.new("RGB", (24, 24), color="red") + blue = Image.new("RGB", (24, 24), color="blue") + messages = [ + [ + { + "role": "user", + "content": [{"type": "image", "image": red}, {"type": "text", "text": "What is this?"}], + } + ], + [{"role": "user", "content": [{"type": "text", "text": "Is it blue?"}, {"type": "image", "image": blue}]}], + ] + inputs = processor.apply_chat_template( + conversation=messages, + add_generation_prompt=True, + tokenize=True, + return_dict=True, + return_tensors="pt", + padding=True, + ).to(device) + else: + inputs = {"input_ids": torch.tensor([[1, 2, 3, 4]], device=device)} + + with torch.no_grad(): + out = model(**inputs) + + if "logits" in out: + output_tensor = out["logits"] + elif "last_hidden_state" in out: + output_tensor = out["last_hidden_state"] + else: + raise RuntimeError(f"[smoke_test] {model.__class__.__name__}: no logits or last_hidden_state on output") + if torch.isnan(output_tensor).any(): + raise RuntimeError(f"[smoke_test] {model.__class__.__name__}: NaN in forward output") + print(f"[smoke_test] {model.__class__.__name__}: OK (output shape {tuple(output_tensor.shape)})") + + +def _flatten(d, prefix=""): + out = {} + for k, v in d.items(): + key = f"{prefix}{k}" if prefix else k + if isinstance(v, dict): + out.update(_flatten(v, f"{key}.")) + else: + out[key] = v + return out + + +_DIFF_IGNORE = {"_name_or_path", "transformers_version", "architectures", "model_type", "torch_dtype", "dtype"} + + +_TORCH_TO_SAFETENSORS_DTYPE = { + torch.float32: "F32", + torch.float16: "F16", + torch.bfloat16: "BF16", + torch.float64: "F64", + torch.float8_e4m3fn: "F8_E4M3", + torch.float8_e5m2: "F8_E5M2", + torch.int8: "I8", + torch.int16: "I16", + torch.int32: "I32", + torch.int64: "I64", + torch.uint8: "U8", + torch.bool: "BOOL", +} + + +def check_dtype_pattern(reference_id, model): + """Flag tensors whose dtype diverges from the reference checkpoint. + + Reads the reference safetensors header via the Hub API (no weight download). Useful to catch cases + like Qwen3.5 where specific params (e.g. linear_attn.A_log) are kept in fp32 while the rest is bf16. + """ + metadata = api.get_safetensors_metadata(reference_id) + ref_dtypes = {name: info.dtype for fm in metadata.files_metadata.values() for name, info in fm.tensors.items()} + + mismatches = [] + for name, tensor in model.state_dict().items(): + ref_dtype = ref_dtypes.get(name) + if ref_dtype is None: + continue # tensor has no counterpart in the reference (e.g. scale-down, PEFT wrapper, tying) + tiny_dtype = _TORCH_TO_SAFETENSORS_DTYPE.get(tensor.dtype) + if tiny_dtype != ref_dtype: + mismatches.append((name, ref_dtype, tiny_dtype)) + + if not mismatches: + print(f"[dtype_check] {reference_id}: all matched tensors have the reference dtype") + return + + print(f"[dtype_check] {reference_id}: {len(mismatches)} tensors differ from reference:") + for name, ref, tiny in mismatches: + print(f" {name}: reference={ref}, tiny={tiny}") + + +def print_config_diff(reference_id, model): + """Print the flat, recursive diff between the reference Hub config and the tiny-model config.""" + reference_config = AutoConfig.from_pretrained(reference_id) + ref_flat = _flatten(reference_config.to_dict()) + tiny_flat = _flatten(model.config.to_dict()) + + keys = sorted(set(ref_flat) | set(tiny_flat)) + rows = [] + for k in keys: + if any(k == ig or k.endswith(f".{ig}") for ig in _DIFF_IGNORE): + continue + rv, tv = ref_flat.get(k, ""), tiny_flat.get(k, "") + if rv != tv: + rows.append((k, rv, tv)) + + print(f"[config_diff] {reference_id} vs tiny ({len(rows)} differences)") + for k, r, t in rows: + print(f" {k:48s} {str(r)[:34]:34s} → {str(t)[:34]}") + + +def _parse_args(): + parser = argparse.ArgumentParser(add_help=False) + parser.add_argument( + "--create-pr", + action="store_true", + help="If the repo already exists, open a PR instead of skipping.", + ) + args, _ = parser.parse_known_args() + return args + + +def push_to_hub(model, tokenizer, generation_config, prefix=None, suffix=None, create_pr=None): + if create_pr is None: + create_pr = _parse_args().create_pr + + model_class_name = model.__class__.__name__ + content = MODEL_CARD.format(model_class_name=model_class_name) + model_card = ModelCard(content) + if prefix is not None: + model_class_name = f"{prefix}-{model_class_name}" + repo_id = f"{ORGANIZATION}/{model_class_name}" + if suffix is not None: + repo_id += f"-{suffix}" + + exists = api.repo_exists(repo_id) + if exists and not create_pr: + print(f"Model {repo_id} already exists, skipping (pass --create-pr to open a PR)") + return + + if not exists: + api.create_repo(repo_id, exist_ok=True) + + # Save all artifacts to a temp dir and upload them in a single commit, so --create-pr opens one PR. + with tempfile.TemporaryDirectory() as tmpdir: + model.save_pretrained(tmpdir) + if tokenizer is not None: + tokenizer.save_pretrained(tmpdir) + if generation_config is not None: + generation_config.save_pretrained(tmpdir) + model_card.save(os.path.join(tmpdir, "README.md")) + + operations = [ + CommitOperationAdd( + path_in_repo=os.path.relpath(os.path.join(root, name), tmpdir), + path_or_fileobj=os.path.join(root, name), + ) + for root, _, files in os.walk(tmpdir) + for name in files + ] + commit_info = api.create_commit( + repo_id=repo_id, + operations=operations, + commit_message=f"Upload {model.__class__.__name__}", + create_pr=exists and create_pr, + ) + if commit_info.pr_url: + print(f"[push_to_hub] PR opened: {commit_info.pr_url}") + + +def init_weights_tiny_model(model): + """ + Initialize tiny test models to avoid NaNs from uninitialized weights. + + Uses safe defaults: + - Linear/Conv1d: Xavier uniform (weights), zero (biases) + - Embedding: Normal(0, 0.02) + - LayerNorm: Ones (weights), zero (biases) + """ + for module in model.modules(): + if isinstance(module, nn.Linear): + if module.bias is not None: + nn.init.zeros_(module.bias) + nn.init.xavier_uniform_(module.weight) + + elif isinstance(module, nn.Embedding): + nn.init.normal_(module.weight, mean=0.0, std=0.02) + + elif isinstance(module, nn.LayerNorm): + nn.init.ones_(module.weight) + if module.bias is not None: + nn.init.zeros_(module.bias) + + elif isinstance(module, nn.Conv1d): + if module.bias is not None: + nn.init.zeros_(module.bias) + nn.init.xavier_uniform_(module.weight) diff --git a/scripts/generate_tiny_models/for_causal_lm/__init__.py b/scripts/generate_tiny_models/for_causal_lm/__init__.py new file mode 100644 index 00000000000..3d26f4482fe --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py new file mode 100644 index 00000000000..6632bbb13b0 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, Cohere2Config, Cohere2ForCausalLM, GenerationConfig + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "CohereLabs/tiny-aya-earth" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = Cohere2Config( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = Cohere2ForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py new file mode 100644 index 00000000000..daafb1ec187 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, CohereConfig, CohereForCausalLM, GenerationConfig + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "CohereLabs/aya-expanse-8b" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = CohereConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = CohereForCausalLM(config).to(dtype=torch.float16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py new file mode 100644 index 00000000000..fe13290ecfa --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, DeepseekV3Config, DeepseekV3ForCausalLM, GenerationConfig + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "deepseek-ai/DeepSeek-R1" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = DeepseekV3Config( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = DeepseekV3ForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm_0528.py b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm_0528.py new file mode 100644 index 00000000000..13db90d36f7 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm_0528.py @@ -0,0 +1,50 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Note: R1-0528 is kept in addition to R1 because it has a different chat template. + +import torch +from transformers import AutoTokenizer, DeepseekV3Config, DeepseekV3ForCausalLM, GenerationConfig + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "deepseek-ai/DeepSeek-R1-0528" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = DeepseekV3Config( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = DeepseekV3ForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "0528") diff --git a/scripts/generate_tiny_models/for_causal_lm/falcon_mamba_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/falcon_mamba_for_causal_lm.py new file mode 100644 index 00000000000..77133708ac1 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/falcon_mamba_for_causal_lm.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, FalconMambaConfig, FalconMambaForCausalLM, GenerationConfig + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "tiiuae/falcon-7b-instruct" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = FalconMambaConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = FalconMambaForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/gemma2_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gemma2_for_causal_lm.py new file mode 100644 index 00000000000..68935533aac --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/gemma2_for_causal_lm.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, Gemma2Config, Gemma2ForCausalLM, GenerationConfig + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "google/gemma-2-2b-it" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = Gemma2Config( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = Gemma2ForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/gemma_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gemma_for_causal_lm.py new file mode 100644 index 00000000000..22874adc2f9 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/gemma_for_causal_lm.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GemmaConfig, GemmaForCausalLM, GenerationConfig + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "google/gemma-7b-it" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = GemmaConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = GemmaForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/glm4_moe_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/glm4_moe_for_causal_lm.py new file mode 100644 index 00000000000..b0721795295 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/glm4_moe_for_causal_lm.py @@ -0,0 +1,50 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, Glm4MoeConfig, Glm4MoeForCausalLM + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "zai-org/GLM-4.5" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = Glm4MoeConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, + n_routed_experts=4, + num_experts_per_tok=2, +) +model = Glm4MoeForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py b/scripts/generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py new file mode 100644 index 00000000000..8d1eb5103ea --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, GPT2Config, GPT2LMHeadModel + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "openai-community/gpt2" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = GPT2Config( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = GPT2LMHeadModel(config).to(dtype=torch.float32) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/gpt_neox_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gpt_neox_for_causal_lm.py new file mode 100644 index 00000000000..080076f18c8 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/gpt_neox_for_causal_lm.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, GPTNeoXConfig, GPTNeoXForCausalLM + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "EleutherAI/pythia-14m" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = GPTNeoXConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = GPTNeoXForCausalLM(config).to(dtype=torch.float16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/gpt_oss_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gpt_oss_for_causal_lm.py new file mode 100644 index 00000000000..6ae18272af7 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/gpt_oss_for_causal_lm.py @@ -0,0 +1,50 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, GptOssConfig, GptOssForCausalLM + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "openai/gpt-oss-20b" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = GptOssConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, + num_local_experts=4, + num_experts_per_tok=2, +) +model = GptOssForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3.py b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3.py new file mode 100644 index 00000000000..1622c372870 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = LlamaConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = LlamaForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "3") diff --git a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_1.py b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_1.py new file mode 100644 index 00000000000..cb361901fcf --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_1.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = LlamaConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = LlamaForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "3.1") diff --git a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_2.py b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_2.py new file mode 100644 index 00000000000..34fda29b5f9 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_2.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = LlamaConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = LlamaForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "3.2") diff --git a/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_1.py b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_1.py new file mode 100644 index 00000000000..34615475bf5 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_1.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, MistralConfig, MistralForCausalLM + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.1" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = MistralConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = MistralForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "0.1") diff --git a/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_2.py b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_2.py new file mode 100644 index 00000000000..aa4a9ce849a --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_2.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, MistralConfig, MistralForCausalLM + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = MistralConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = MistralForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "0.2") diff --git a/scripts/generate_tiny_models/for_causal_lm/opt_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/opt_for_causal_lm.py new file mode 100644 index 00000000000..cf52a1c7c93 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/opt_for_causal_lm.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, OPTConfig, OPTForCausalLM + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "facebook/opt-1.3b" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = OPTConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = OPTForCausalLM(config).to(dtype=torch.float16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm.py new file mode 100644 index 00000000000..7688639f873 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm.py @@ -0,0 +1,29 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from peft import LoraConfig, get_peft_model +from transformers import Qwen3ForCausalLM + +from .._common import check_transformers_version, push_to_hub, smoke_test + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +BASE = "trl-internal-testing/tiny-Qwen3ForCausalLM" + +model = Qwen3ForCausalLM.from_pretrained(BASE, dtype="auto") +model = get_peft_model(model, LoraConfig()) +smoke_test(model, None) +push_to_hub(model, None, None, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm_2.py b/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm_2.py new file mode 100644 index 00000000000..cf84cfd7dee --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm_2.py @@ -0,0 +1,31 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Same model class as peft_qwen3_for_causal_lm.py, with different (random) LoRA weights. + +from peft import LoraConfig, get_peft_model +from transformers import Qwen3ForCausalLM + +from .._common import check_transformers_version, push_to_hub, smoke_test + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +BASE = "trl-internal-testing/tiny-Qwen3ForCausalLM" + +model = Qwen3ForCausalLM.from_pretrained(BASE, dtype="auto") +model = get_peft_model(model, LoraConfig()) +smoke_test(model, None) +push_to_hub(model, None, None, "tiny", "2") diff --git a/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3.py b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3.py new file mode 100644 index 00000000000..edb13a7634b --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, Phi3Config, Phi3ForCausalLM + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "microsoft/Phi-3-mini-4k-instruct" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = Phi3Config( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = Phi3ForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "3") diff --git a/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3_5.py b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3_5.py new file mode 100644 index 00000000000..d5816214c0b --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3_5.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, Phi3Config, Phi3ForCausalLM + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "microsoft/Phi-3.5-mini-instruct" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = Phi3Config( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = Phi3ForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "3.5") diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5.py b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5.py new file mode 100644 index 00000000000..4b94615fcaf --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen2.5-32B-Instruct" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = Qwen2Config( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = Qwen2ForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "2.5") diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5_coder.py b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5_coder.py new file mode 100644 index 00000000000..b7cce4494ba --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5_coder.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen2.5-Coder-0.5B" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = Qwen2Config( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = Qwen2ForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "2.5-Coder") diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm.py new file mode 100644 index 00000000000..00afd997aad --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm.py @@ -0,0 +1,50 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, Qwen3Config, Qwen3ForCausalLM + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen3-8B" +# Revision pins the chat template PR with `{% generation %}` support. +REVISION = "refs/pr/14" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, revision=REVISION) +generation_config = GenerationConfig.from_pretrained(MODEL_ID, revision=REVISION) +config = Qwen3Config( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = Qwen3ForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm_instruct_2507.py b/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm_instruct_2507.py new file mode 100644 index 00000000000..6f84c69005b --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm_instruct_2507.py @@ -0,0 +1,50 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Qwen3-4B-Instruct-2507 ships the non-thinking chat template, distinct from the default Qwen3 template. + +import torch +from transformers import AutoTokenizer, GenerationConfig, Qwen3Config, Qwen3ForCausalLM + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen3-4B-Instruct-2507" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = Qwen3Config( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = Qwen3ForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "Instruct-2507") diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen3_moe_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/qwen3_moe_for_causal_lm.py new file mode 100644 index 00000000000..ccf562f3c46 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/qwen3_moe_for_causal_lm.py @@ -0,0 +1,50 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, Qwen3MoeConfig, Qwen3MoeForCausalLM + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen3-30B-A3B" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = Qwen3MoeConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, + num_experts=4, + num_experts_per_tok=2, +) +model = Qwen3MoeForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/small_qwen2_for_causal_lm_2_5.py b/scripts/generate_tiny_models/for_causal_lm/small_qwen2_for_causal_lm_2_5.py new file mode 100644 index 00000000000..acb0ef4f465 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/small_qwen2_for_causal_lm_2_5.py @@ -0,0 +1,42 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Slightly bigger than the "tiny" variant: vLLM requires hidden_size // num_attention_heads = 32. + +import torch +from transformers import AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen2.5-32B-Instruct" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = Qwen2Config( + vocab_size=len(tokenizer.vocab), + hidden_size=128, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = Qwen2ForCausalLM(config).to(dtype=torch.bfloat16) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "small", "2.5") diff --git a/scripts/generate_tiny_models/for_causal_lm/small_qwen3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/small_qwen3_for_causal_lm.py new file mode 100644 index 00000000000..873fcb9641f --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/small_qwen3_for_causal_lm.py @@ -0,0 +1,42 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Slightly bigger than the "tiny" variant: vLLM requires hidden_size // num_attention_heads = 32. + +import torch +from transformers import AutoTokenizer, GenerationConfig, Qwen3Config, Qwen3ForCausalLM + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen3-4B" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = Qwen3Config( + vocab_size=len(tokenizer.vocab), + hidden_size=128, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = Qwen3ForCausalLM(config).to(dtype=torch.bfloat16) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "small") diff --git a/scripts/generate_tiny_models/for_conditional_generation/__init__.py b/scripts/generate_tiny_models/for_conditional_generation/__init__.py new file mode 100644 index 00000000000..3d26f4482fe --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scripts/generate_tiny_models/for_conditional_generation/bart_model.py b/scripts/generate_tiny_models/for_conditional_generation/bart_model.py new file mode 100644 index 00000000000..c754515af72 --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/bart_model.py @@ -0,0 +1,33 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoTokenizer, BartModel + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "facebook/bart-base" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +config = AutoConfig.from_pretrained(MODEL_ID) +config.d_model = 24 +model = BartModel(config).to(dtype=torch.float32) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, None, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/gemma3_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/gemma3_for_conditional_generation.py new file mode 100644 index 00000000000..2f85c68abe8 --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/gemma3_for_conditional_generation.py @@ -0,0 +1,49 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoProcessor, Gemma3ForConditionalGeneration, GenerationConfig + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "google/gemma-3-4b-it" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": None, # Set it automatically from num_hidden_layers +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, +} + +config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config) +model = Gemma3ForConditionalGeneration(config).to(dtype=torch.bfloat16) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/gemma4_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/gemma4_for_conditional_generation.py new file mode 100644 index 00000000000..8d3cba21904 --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/gemma4_for_conditional_generation.py @@ -0,0 +1,61 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Gemma4 rope validation fails when passing text_config as a dict through AutoConfig, +# so the config is loaded first and then mutated in place. + +import torch +from transformers import AutoConfig, AutoProcessor, Gemma4ForConditionalGeneration, GenerationConfig + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + + +TRANSFORMERS_VERSION = "5.6.0" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "google/gemma-4-E2B-it" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, +} + +config = AutoConfig.from_pretrained(MODEL_ID) +for k, v in text_config.items(): + setattr(config.text_config, k, v) +for k, v in vision_config.items(): + setattr(config.vision_config, k, v) +config.text_config.layer_types = ["sliding_attention", "full_attention"] +config.text_config.num_kv_shared_layers = 0 +config.text_config.global_head_dim = 8 +config.text_config.hidden_size_per_layer_input = 16 +config.audio_config = None + +model = Gemma4ForConditionalGeneration(config).to(dtype=torch.bfloat16) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/idefics2_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/idefics2_for_conditional_generation.py new file mode 100644 index 00000000000..4e6dd711aff --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/idefics2_for_conditional_generation.py @@ -0,0 +1,54 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoProcessor, GenerationConfig, Idefics2ForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "HuggingFaceM4/idefics2-8b" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": None, +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, +} + +config = AutoConfig.from_pretrained( + MODEL_ID, + text_config=text_config, + vision_config=vision_config, + perceiver_config={"hidden_size": 16}, +) +model = Idefics2ForConditionalGeneration(config).to(dtype=torch.float32) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/idefics3_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/idefics3_for_conditional_generation.py new file mode 100644 index 00000000000..a24909ab7d8 --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/idefics3_for_conditional_generation.py @@ -0,0 +1,49 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoProcessor, GenerationConfig, Idefics3ForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "HuggingFaceM4/Idefics3-8B-Llama3" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": None, +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, +} + +config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config) +model = Idefics3ForConditionalGeneration(config).to(dtype=torch.bfloat16) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/internvl_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/internvl_for_conditional_generation.py new file mode 100644 index 00000000000..76ec81ca4ea --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/internvl_for_conditional_generation.py @@ -0,0 +1,49 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoProcessor, GenerationConfig, InternVLForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "OpenGVLab/InternVL3-8B-hf" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": None, +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, +} + +config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config) +model = InternVLForConditionalGeneration(config).to(dtype=torch.bfloat16) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/llava_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/llava_for_conditional_generation.py new file mode 100644 index 00000000000..1a7ecf4b8cb --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/llava_for_conditional_generation.py @@ -0,0 +1,49 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoProcessor, GenerationConfig, LlavaForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "llava-hf/llava-1.5-7b-hf" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": None, +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, +} + +config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config) +model = LlavaForConditionalGeneration(config).to(dtype=torch.float16) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/llava_next_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/llava_next_for_conditional_generation.py new file mode 100644 index 00000000000..875fe0545da --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/llava_next_for_conditional_generation.py @@ -0,0 +1,55 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Original model dtype is float16, but it triggers CUDA device-side assert on generation (see GH-4741), +# so this tiny model is saved in bfloat16. +# Upstream hotfix: llava-hf/llava-v1.6-mistral-7b-hf mistakenly sets text_config.dtype to "bfloat16" +# (see https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf/discussions/46), which we clear here. + +import torch +from transformers import AutoConfig, AutoProcessor, GenerationConfig, LlavaNextForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "llava-hf/llava-v1.6-mistral-7b-hf" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": None, + "dtype": None, # hotfix for upstream text_config.dtype = "bfloat16" +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, +} + +config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config) +model = LlavaNextForConditionalGeneration(config).to(dtype=torch.bfloat16) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/paligemma_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/paligemma_for_conditional_generation.py new file mode 100644 index 00000000000..5b5fa67ec9d --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/paligemma_for_conditional_generation.py @@ -0,0 +1,49 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoProcessor, GenerationConfig, PaliGemmaForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "google/paligemma-3b-pt-224" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": None, +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, +} + +config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config) +model = PaliGemmaForConditionalGeneration(config).to(dtype=torch.float32) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/qwen2_5_vl_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/qwen2_5_vl_for_conditional_generation.py new file mode 100644 index 00000000000..2eab8826124 --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/qwen2_5_vl_for_conditional_generation.py @@ -0,0 +1,63 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Note: Qwen2.5-VL requires out_hidden_size on the vision config, plus root-level num_hidden_layers/hidden_size/ +# num_attention_heads (distinct from the text_config fields). See GH-4101 and transformers#41020. + +import torch +from transformers import AutoConfig, AutoProcessor, GenerationConfig, Qwen2_5_VLForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen2.5-VL-3B-Instruct" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": None, + "rope_scaling": {"type": "default", "mrope_section": [1, 1], "rope_type": "default"}, +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, + "depth": 2, + "out_hidden_size": 16, +} + +config = AutoConfig.from_pretrained( + MODEL_ID, + text_config=text_config, + vision_config=vision_config, + rope_scaling={"type": "default", "mrope_section": [1, 1], "rope_type": "default"}, + num_hidden_layers=2, + hidden_size=16, + num_attention_heads=4, +) +model = Qwen2_5_VLForConditionalGeneration(config).to(dtype=torch.bfloat16) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/qwen2_vl_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/qwen2_vl_for_conditional_generation.py new file mode 100644 index 00000000000..b3fdf792418 --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/qwen2_vl_for_conditional_generation.py @@ -0,0 +1,58 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Note: two distinct rope_scaling dict objects (root and text_config). See GH-4101 and transformers#41020. + +import torch +from transformers import AutoConfig, AutoProcessor, GenerationConfig, Qwen2VLForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": None, + "rope_scaling": {"type": "default", "mrope_section": [1, 1], "rope_type": "default"}, +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, + "depth": 2, +} + +config = AutoConfig.from_pretrained( + MODEL_ID, + text_config=text_config, + vision_config=vision_config, + rope_scaling={"type": "default", "mrope_section": [1, 1], "rope_type": "default"}, +) +model = Qwen2VLForConditionalGeneration(config).to(dtype=torch.bfloat16) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/qwen3_5_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/qwen3_5_for_conditional_generation.py new file mode 100644 index 00000000000..ce0d0dfd0e7 --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/qwen3_5_for_conditional_generation.py @@ -0,0 +1,64 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Notes: +# - Qwen3.5 auto-builds layer_types from num_hidden_layers with default interval 4, so tiny models +# (2 layers) end up all-linear-attention, which breaks dynamic cache. Force one full-attention layer. +# - The vision config expects `depth`/`num_heads` (not `num_hidden_layers`/`num_attention_heads`). +# - Qwen3.5 has no published generation_config on the Hub yet. +# - Qwen3.5 keeps some linear-attn weights in float32; we cast them back after the bfloat16 conversion. + +import torch +from transformers import AutoConfig, AutoProcessor, Qwen3_5ForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + + +TRANSFORMERS_VERSION = "5.2.0" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen3.5-0.8B" + +processor = AutoProcessor.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": ["linear_attention", "full_attention"], + "full_attention_interval": 2, +} +vision_config = { + "hidden_size": 16, + "depth": 2, + "num_heads": 4, + "intermediate_size": 32, + "out_hidden_size": 16, +} + +config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config) +model = Qwen3_5ForConditionalGeneration(config).to(dtype=torch.bfloat16) + +# Restore float32 for linear-attn weights that the upstream model keeps in fp32. +for i, layer_type in enumerate(config.text_config.layer_types): + if layer_type == "linear_attention": + linear_attn = model.model.language_model.layers[i].linear_attn + linear_attn.A_log.data = linear_attn.A_log.data.float() + linear_attn.norm.weight.data = linear_attn.norm.weight.data.float() + +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, None, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/qwen3_6_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/qwen3_6_for_conditional_generation.py new file mode 100644 index 00000000000..a0f5bef6e7a --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/qwen3_6_for_conditional_generation.py @@ -0,0 +1,63 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Notes: +# - Qwen3.6 reuses the Qwen3_5Moe class with extra MoE config fields +# (num_experts, num_experts_per_tok, moe_intermediate_size, shared_expert_intermediate_size). +# - Same layer_types/full_attention_interval workaround as Qwen3.5: tiny models (2 layers) need +# one full-attention layer to keep the dynamic cache happy. +# - The vision config expects `depth`/`num_heads` (not `num_hidden_layers`/`num_attention_heads`). +# - Unlike Qwen3.5, Qwen3.6 stores linear-attn weights in bf16, so no float32 cast is needed. + +import torch +from transformers import AutoConfig, AutoProcessor, GenerationConfig, Qwen3_5MoeForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + + +TRANSFORMERS_VERSION = "5.2.0" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen3.6-35B-A3B" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": ["linear_attention", "full_attention"], + "full_attention_interval": 2, + "num_experts": 4, + "num_experts_per_tok": 2, + "moe_intermediate_size": 32, + "shared_expert_intermediate_size": 32, +} +vision_config = { + "hidden_size": 16, + "depth": 2, + "num_heads": 4, + "intermediate_size": 32, + "out_hidden_size": 16, +} + +config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config) +model = Qwen3_5MoeForConditionalGeneration(config).to(dtype=torch.bfloat16) + +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny", "3.6") diff --git a/scripts/generate_tiny_models/for_conditional_generation/qwen3_vl_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/qwen3_vl_for_conditional_generation.py new file mode 100644 index 00000000000..f04fb4cfadf --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/qwen3_vl_for_conditional_generation.py @@ -0,0 +1,57 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Notes: +# - "layer_types" is intentionally omitted from text_config: qwen3_vl's modeling code checks +# `hasattr(config, "layer_types")` and uses a different path when absent +# (see transformers/models/qwen3_vl/modeling_qwen3_vl.py). +# - mrope_section needs 3 elements (for dim, offset in enumerate((1, 2), start=1): mrope_section[dim]). + +import torch +from transformers import AutoConfig, AutoProcessor, GenerationConfig, Qwen3VLForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + + +TRANSFORMERS_VERSION = "4.57.0" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen3-VL-2B-Instruct" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "rope_scaling": {"mrope_interleaved": True, "mrope_section": [2, 2, 2], "rope_type": "default"}, +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, + "depth": 2, + "out_hidden_size": 16, +} + +config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config) +model = Qwen3VLForConditionalGeneration(config).to(dtype=torch.bfloat16) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/smolvlm_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/smolvlm_for_conditional_generation.py new file mode 100644 index 00000000000..b019e6d084b --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/smolvlm_for_conditional_generation.py @@ -0,0 +1,49 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoProcessor, GenerationConfig, SmolVLMForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "HuggingFaceTB/SmolVLM2-2.2B-Instruct" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": None, +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, +} + +config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config) +model = SmolVLMForConditionalGeneration(config).to(dtype=torch.float32) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/t5_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/t5_for_conditional_generation.py new file mode 100644 index 00000000000..451e7b5ecc9 --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/t5_for_conditional_generation.py @@ -0,0 +1,34 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, T5ForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "google/flan-t5-small" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = AutoConfig.from_pretrained(MODEL_ID) +config.d_model = 24 +model = T5ForConditionalGeneration(config).to(dtype=torch.float32) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_sequence_classification/__init__.py b/scripts/generate_tiny_models/for_sequence_classification/__init__.py new file mode 100644 index 00000000000..3d26f4482fe --- /dev/null +++ b/scripts/generate_tiny_models/for_sequence_classification/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scripts/generate_tiny_models/for_sequence_classification/gpt_neox_for_sequence_classification.py b/scripts/generate_tiny_models/for_sequence_classification/gpt_neox_for_sequence_classification.py new file mode 100644 index 00000000000..c0e270ad050 --- /dev/null +++ b/scripts/generate_tiny_models/for_sequence_classification/gpt_neox_for_sequence_classification.py @@ -0,0 +1,49 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, GPTNeoXForSequenceClassification + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "EleutherAI/pythia-14m" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = AutoConfig.from_pretrained( + MODEL_ID, + num_labels=1, + hidden_size=16, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = GPTNeoXForSequenceClassification(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_sequence_classification/llama_for_sequence_classification_3_2.py b/scripts/generate_tiny_models/for_sequence_classification/llama_for_sequence_classification_3_2.py new file mode 100644 index 00000000000..e082ed94656 --- /dev/null +++ b/scripts/generate_tiny_models/for_sequence_classification/llama_for_sequence_classification_3_2.py @@ -0,0 +1,49 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, LlamaForSequenceClassification + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = AutoConfig.from_pretrained( + MODEL_ID, + num_labels=1, + hidden_size=16, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = LlamaForSequenceClassification(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "3.2") diff --git a/scripts/generate_tiny_models/for_sequence_classification/qwen2_for_sequence_classification_2_5.py b/scripts/generate_tiny_models/for_sequence_classification/qwen2_for_sequence_classification_2_5.py new file mode 100644 index 00000000000..6b83cf4204f --- /dev/null +++ b/scripts/generate_tiny_models/for_sequence_classification/qwen2_for_sequence_classification_2_5.py @@ -0,0 +1,51 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen2ForSequenceClassification + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen2.5-32B-Instruct" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = AutoConfig.from_pretrained( + MODEL_ID, + num_labels=1, + hidden_size=16, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +# Bug in transformers: it ignores num_hidden_layers to build layer_types +config.layer_types = config.layer_types[:2] +model = Qwen2ForSequenceClassification(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "2.5") diff --git a/scripts/generate_tiny_models/for_sequence_classification/qwen3_for_sequence_classification.py b/scripts/generate_tiny_models/for_sequence_classification/qwen3_for_sequence_classification.py new file mode 100644 index 00000000000..fa05dcc1105 --- /dev/null +++ b/scripts/generate_tiny_models/for_sequence_classification/qwen3_for_sequence_classification.py @@ -0,0 +1,51 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen3ForSequenceClassification + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen3-4B" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = AutoConfig.from_pretrained( + MODEL_ID, + num_labels=1, + hidden_size=16, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +# Bug in transformers: it ignores num_hidden_layers to build layer_types +config.layer_types = config.layer_types[:2] +model = Qwen3ForSequenceClassification(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_sequence_classification/qwen3_moe_for_sequence_classification.py b/scripts/generate_tiny_models/for_sequence_classification/qwen3_moe_for_sequence_classification.py new file mode 100644 index 00000000000..b89842afbaa --- /dev/null +++ b/scripts/generate_tiny_models/for_sequence_classification/qwen3_moe_for_sequence_classification.py @@ -0,0 +1,43 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen3MoeForSequenceClassification + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen3-30B-A3B" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = AutoConfig.from_pretrained( + MODEL_ID, + num_labels=1, + hidden_size=16, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, + num_experts=4, + num_experts_per_tok=2, +) +model = Qwen3MoeForSequenceClassification(config).to(dtype=torch.bfloat16) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny")