diff --git a/scripts/generate_tiny_models.py b/scripts/generate_tiny_models.py
deleted file mode 100644
index 73b7d166949..00000000000
--- a/scripts/generate_tiny_models.py
+++ /dev/null
@@ -1,450 +0,0 @@
-# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# This script generates tiny models used in the TRL library for unit tests. It pushes them to the Hub under the
-# `trl-internal-testing` organization.
-# This script is meant to be run when adding new tiny model to the TRL library.
-
-import torch
-from huggingface_hub import HfApi, ModelCard
-from peft import LoraConfig, get_peft_model
-from torch import nn
-from transformers import (
-    AutoConfig,
-    AutoProcessor,
-    AutoTokenizer,
-    BartModel,
-    Cohere2Config,
-    Cohere2ForCausalLM,
-    CohereConfig,
-    CohereForCausalLM,
-    DeepseekV3Config,
-    DeepseekV3ForCausalLM,
-    FalconMambaConfig,
-    FalconMambaForCausalLM,
-    Gemma2Config,
-    Gemma2ForCausalLM,
-    Gemma3ForConditionalGeneration,
-    Gemma4ForConditionalGeneration,
-    GemmaConfig,
-    GemmaForCausalLM,
-    GenerationConfig,
-    Glm4MoeConfig,
-    Glm4MoeForCausalLM,
-    GPT2Config,
-    GPT2LMHeadModel,
-    GPTNeoXConfig,
-    GPTNeoXForCausalLM,
-    GPTNeoXForSequenceClassification,
-    GptOssConfig,
-    GptOssForCausalLM,
-    Idefics2Config,
-    Idefics2ForConditionalGeneration,
-    Idefics3ForConditionalGeneration,
-    InternVLForConditionalGeneration,
-    LlamaConfig,
-    LlamaForCausalLM,
-    LlamaForSequenceClassification,
-    LlavaForConditionalGeneration,
-    LlavaNextForConditionalGeneration,
-    MistralConfig,
-    MistralForCausalLM,
-    OPTConfig,
-    OPTForCausalLM,
-    PaliGemmaForConditionalGeneration,
-    Phi3Config,
-    Phi3ForCausalLM,
-    Qwen2_5_VLConfig,
-    Qwen2_5_VLForConditionalGeneration,
-    Qwen2Config,
-    Qwen2ForCausalLM,
-    Qwen2ForSequenceClassification,
-    Qwen2VLConfig,
-    Qwen2VLForConditionalGeneration,
-    Qwen3_5Config,
-    Qwen3_5ForConditionalGeneration,
-    Qwen3_5MoeConfig,
-    Qwen3_5MoeForConditionalGeneration,
-    Qwen3Config,
-    Qwen3ForCausalLM,
-    Qwen3ForSequenceClassification,
-    Qwen3MoeConfig,
-    Qwen3MoeForCausalLM,
-    Qwen3MoeForSequenceClassification,
-    Qwen3VLConfig,
-    Qwen3VLForConditionalGeneration,
-    SmolVLMForConditionalGeneration,
-    T5ForConditionalGeneration,
-)
-
-
-ORGANIZATION = "trl-internal-testing"
-
-MODEL_CARD = """
----
-library_name: transformers
-tags: [trl]
----
-
-# Tiny {model_class_name}
-
-This is a minimal model built for unit tests in the [TRL](https://github.com/huggingface/trl) library.
-"""
-
-
-api = HfApi()
-
-
-def push_to_hub(model, tokenizer, generation_config, prefix=None, suffix=None, force=False):
-    model_class_name = model.__class__.__name__
-    content = MODEL_CARD.format(model_class_name=model_class_name)
-    model_card = ModelCard(content)
-    if prefix is not None:
-        model_class_name = f"{prefix}-{model_class_name}"
-    repo_id = f"{ORGANIZATION}/{model_class_name}"
-    if suffix is not None:
-        repo_id += f"-{suffix}"
-
-    if api.repo_exists(repo_id) and not force:
-        print(f"Model {repo_id} already exists, skipping")
-    else:
-        model.push_to_hub(repo_id)
-        model_card.push_to_hub(repo_id)
-        if tokenizer is not None:
-            tokenizer.push_to_hub(repo_id)
-        if generation_config is not None:
-            generation_config.push_to_hub(repo_id)
-
-
-def init_weights_tiny_model(model):
-    """
-    Initialize tiny test models to avoid NaNs from uninitialized weights.
-
-    Uses safe defaults:
-      - Linear/Conv1d: Xavier uniform (weights), zero (biases)
-      - Embedding: Normal(0, 0.02)
-      - LayerNorm: Ones (weights), zero (biases)
-
-    Args:
-        model: PyTorch model (modified in-place)
-    """
-    for module in model.modules():
-        if isinstance(module, nn.Linear):
-            # Attention/MLP projections → Xavier or Normal
-            if module.bias is not None:
-                nn.init.zeros_(module.bias)
-            nn.init.xavier_uniform_(module.weight)
-
-        elif isinstance(module, nn.Embedding):
-            # Token embeddings → GPT-style Normal
-            nn.init.normal_(module.weight, mean=0.0, std=0.02)
-
-        elif isinstance(module, nn.LayerNorm):
-            # LayerNorm weights always 1, bias 0
-            nn.init.ones_(module.weight)
-            if module.bias is not None:
-                nn.init.zeros_(module.bias)
-
-        elif isinstance(module, nn.Conv1d):
-            # Convolutional layers → Xavier or Normal
-            if module.bias is not None:
-                nn.init.zeros_(module.bias)
-            nn.init.xavier_uniform_(module.weight)
-
-
-# Decoder models
-for model_id, config_class, model_class, dtype, suffix in [
-    # ("bigscience/bloomz-560m", BloomConfig, BloomForCausalLM, None),  # loading fails with this model, see https://huggingface.co/bigscience/bloomz-560m/discussions/14
-    ("CohereLabs/aya-expanse-8b", CohereConfig, CohereForCausalLM, torch.float16, None),
-    ("CohereLabs/tiny-aya-earth", Cohere2Config, Cohere2ForCausalLM, torch.bfloat16, None),
-    ("deepseek-ai/DeepSeek-R1", DeepseekV3Config, DeepseekV3ForCausalLM, torch.bfloat16, None),
-    # It's important to have R1-0528 as it doesn't have the same chat template
-    ("deepseek-ai/DeepSeek-R1-0528", DeepseekV3Config, DeepseekV3ForCausalLM, torch.bfloat16, "0528"),
-    ("tiiuae/falcon-7b-instruct", FalconMambaConfig, FalconMambaForCausalLM, torch.bfloat16, None),
-    ("google/gemma-2-2b-it", Gemma2Config, Gemma2ForCausalLM, torch.bfloat16, None),
-    ("google/gemma-7b-it", GemmaConfig, GemmaForCausalLM, torch.bfloat16, None),
-    ("openai-community/gpt2", GPT2Config, GPT2LMHeadModel, torch.float32, None),
-    ("EleutherAI/pythia-14m", GPTNeoXConfig, GPTNeoXForCausalLM, torch.float16, None),
-    ("meta-llama/Meta-Llama-3-8B-Instruct", LlamaConfig, LlamaForCausalLM, torch.bfloat16, "3"),
-    ("meta-llama/Llama-3.1-8B-Instruct", LlamaConfig, LlamaForCausalLM, torch.bfloat16, "3.1"),
-    ("meta-llama/Llama-3.2-1B-Instruct", LlamaConfig, LlamaForCausalLM, torch.bfloat16, "3.2"),
-    ("mistralai/Mistral-7B-Instruct-v0.1", MistralConfig, MistralForCausalLM, torch.bfloat16, "0.1"),
-    ("mistralai/Mistral-7B-Instruct-v0.2", MistralConfig, MistralForCausalLM, torch.bfloat16, "0.2"),
-    ("facebook/opt-1.3b", OPTConfig, OPTForCausalLM, torch.float16, None),
-    ("microsoft/Phi-3-mini-4k-instruct", Phi3Config, Phi3ForCausalLM, torch.bfloat16, "3"),
-    ("microsoft/Phi-3.5-mini-instruct", Phi3Config, Phi3ForCausalLM, torch.bfloat16, "3.5"),
-    ("Qwen/Qwen2.5-32B-Instruct", Qwen2Config, Qwen2ForCausalLM, torch.bfloat16, "2.5"),
-    ("Qwen/Qwen2.5-Coder-0.5B", Qwen2Config, Qwen2ForCausalLM, torch.bfloat16, "2.5-Coder"),
-    ("Qwen/Qwen3-8B", Qwen3Config, Qwen3ForCausalLM, torch.bfloat16, None),
-    # It's important to have Qwen3-4B-Instruct-2507 as it doesn't have the same chat template (non-thinking variant)
-    ("Qwen/Qwen3-4B-Instruct-2507", Qwen3Config, Qwen3ForCausalLM, torch.bfloat16, "Instruct-2507"),
-]:
-    revision = "refs/pr/14" if model_id == "Qwen/Qwen3-8B" else "main"  # chat template with {% generation %}
-    tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
-    generation_config = GenerationConfig.from_pretrained(model_id, revision=revision)
-    config = config_class(
-        vocab_size=len(tokenizer.vocab),
-        hidden_size=8,
-        num_attention_heads=4,
-        num_key_value_heads=2,
-        num_hidden_layers=2,
-        intermediate_size=32,
-    )
-    model = model_class(config).to(dtype=dtype)
-    init_weights_tiny_model(model)
-    push_to_hub(model, tokenizer, generation_config, "tiny", suffix)
-
-# MoE models
-for model_id, config_class, model_class, dtype, suffix in [
-    ("Qwen/Qwen3-30B-A3B", Qwen3MoeConfig, Qwen3MoeForCausalLM, torch.bfloat16, None),
-    ("openai/gpt-oss-20b", GptOssConfig, GptOssForCausalLM, torch.bfloat16, None),
-    ("zai-org/GLM-4.5", Glm4MoeConfig, Glm4MoeForCausalLM, torch.bfloat16, None),
-]:
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
-    generation_config = GenerationConfig.from_pretrained(model_id)
-    kwargs = {}
-    if model_id == "zai-org/GLM-4.5":
-        kwargs["n_routed_experts"] = 4
-    elif model_id == "Qwen/Qwen3-30B-A3B":
-        kwargs["num_experts"] = 4
-    elif model_id == "openai/gpt-oss-20b":
-        kwargs["num_local_experts"] = 4
-
-    config = config_class(
-        vocab_size=len(tokenizer.vocab),
-        hidden_size=8,
-        num_attention_heads=4,
-        num_key_value_heads=2,
-        num_hidden_layers=2,
-        intermediate_size=32,
-        num_experts_per_tok=2,
-        **kwargs,
-    )
-    model = model_class(config).to(dtype=dtype)
-    init_weights_tiny_model(model)
-    push_to_hub(model, tokenizer, generation_config, "tiny", suffix)
-
-# Two slightly bigger models, required for vLLM testing
-tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-32B-Instruct")
-generation_config = GenerationConfig.from_pretrained("Qwen/Qwen2.5-32B-Instruct")
-config = Qwen2Config(
-    vocab_size=len(tokenizer.vocab),
-    hidden_size=128,  # increase hidden size so that hidden_size // num_attention_heads = 32, required for vLLM
-    num_attention_heads=4,
-    num_key_value_heads=2,
-    num_hidden_layers=2,
-    intermediate_size=32,
-)
-model = Qwen2ForCausalLM(config).to(dtype=torch.bfloat16)
-push_to_hub(model, tokenizer, generation_config, "small", "2.5")
-
-tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-4B")
-generation_config = GenerationConfig.from_pretrained("Qwen/Qwen3-4B")
-config = Qwen3Config(
-    vocab_size=len(tokenizer.vocab),
-    hidden_size=128,  # increase hidden size so that hidden_size // num_attention_heads = 32, required for vLLM
-    num_attention_heads=4,
-    num_key_value_heads=2,
-    num_hidden_layers=2,
-    intermediate_size=32,
-)
-model = Qwen3ForCausalLM(config).to(dtype=torch.bfloat16)
-push_to_hub(model, tokenizer, generation_config, "small")
-
-# Reward models
-for model_id, model_class, dtype, suffix in [
-    ("EleutherAI/pythia-14m", GPTNeoXForSequenceClassification, torch.bfloat16, None),
-    ("meta-llama/Llama-3.2-1B-Instruct", LlamaForSequenceClassification, torch.bfloat16, "3.2"),
-    ("Qwen/Qwen2.5-32B-Instruct", Qwen2ForSequenceClassification, torch.bfloat16, "2.5"),
-    ("Qwen/Qwen3-4B", Qwen3ForSequenceClassification, torch.bfloat16, None),
-]:
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
-    generation_config = GenerationConfig.from_pretrained(model_id)
-    kwargs = {
-        "num_labels": 1,
-        "hidden_size": 16,
-        "num_attention_heads": 4,
-        "num_key_value_heads": 2,
-        "num_hidden_layers": 2,
-        "intermediate_size": 32,
-    }
-    config = AutoConfig.from_pretrained(model_id, **kwargs)
-    # Bug in transformers: it ignores num_hidden_layers to build layer_types
-    if model_id in ("Qwen/Qwen2.5-32B-Instruct", "Qwen/Qwen3-4B"):
-        config.layer_types = config.layer_types[:2]
-    model = model_class(config).to(dtype=dtype)
-    init_weights_tiny_model(model)
-    push_to_hub(model, tokenizer, generation_config, "tiny", suffix)
-
-# MoE Reward models
-for model_id, model_class, dtype, suffix in [
-    ("Qwen/Qwen3-30B-A3B", Qwen3MoeForSequenceClassification, torch.bfloat16, None),
-]:
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
-    generation_config = GenerationConfig.from_pretrained(model_id)
-    kwargs = {
-        "num_labels": 1,
-        "hidden_size": 16,
-        "num_attention_heads": 4,
-        "num_key_value_heads": 2,
-        "num_hidden_layers": 2,
-        "intermediate_size": 32,
-        "num_experts": 4,
-        "num_experts_per_tok": 2,
-    }
-    config = AutoConfig.from_pretrained(model_id, **kwargs)
-    model = model_class(config).to(dtype=dtype)
-    push_to_hub(model, tokenizer, generation_config, "tiny", suffix)
-
-
-# Encoder-decoder models
-for model_id, model_class, dtype, suffix in [
-    ("facebook/bart-base", BartModel, torch.float32, None),
-    ("google/flan-t5-small", T5ForConditionalGeneration, torch.float32, None),
-]:
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
-    generation_config = GenerationConfig.from_pretrained(model_id) if model_id != "facebook/bart-base" else None
-    config = AutoConfig.from_pretrained(model_id)
-    config.d_model = 24
-    model = model_class(config).to(dtype=dtype)
-    push_to_hub(model, tokenizer, generation_config, "tiny", suffix)
-
-
-# Vision Language Models
-for model_id, model_class, dtype in [
-    ("google/gemma-3-4b-it", Gemma3ForConditionalGeneration, torch.bfloat16),
-    ("google/gemma-4-E2B-it", Gemma4ForConditionalGeneration, torch.bfloat16),
-    ("google/paligemma-3b-pt-224", PaliGemmaForConditionalGeneration, torch.float32),
-    ("HuggingFaceM4/idefics2-8b", Idefics2ForConditionalGeneration, torch.float32),
-    ("HuggingFaceM4/Idefics3-8B-Llama3", Idefics3ForConditionalGeneration, torch.bfloat16),
-    ("HuggingFaceTB/SmolVLM2-2.2B-Instruct", SmolVLMForConditionalGeneration, torch.float32),
-    ("llava-hf/llava-1.5-7b-hf", LlavaForConditionalGeneration, torch.float16),
-    # Original model dtype is float16, but it triggers CUDA device side assert error (see GH-4741):
-    ("llava-hf/llava-v1.6-mistral-7b-hf", LlavaNextForConditionalGeneration, torch.bfloat16),
-    ("OpenGVLab/InternVL3-8B-hf", InternVLForConditionalGeneration, torch.bfloat16),
-    ("Qwen/Qwen2-VL-2B-Instruct", Qwen2VLForConditionalGeneration, torch.bfloat16),
-    ("Qwen/Qwen2.5-VL-3B-Instruct", Qwen2_5_VLForConditionalGeneration, torch.bfloat16),
-    ("Qwen/Qwen3-VL-2B-Instruct", Qwen3VLForConditionalGeneration, torch.bfloat16),
-    ("Qwen/Qwen3.5-0.8B", Qwen3_5ForConditionalGeneration, torch.bfloat16),
-    ("Qwen/Qwen3.6-35B-A3B", Qwen3_5MoeForConditionalGeneration, torch.bfloat16),
-]:
-    processor = AutoProcessor.from_pretrained(model_id)
-    generation_config = GenerationConfig.from_pretrained(model_id) if model_id != "Qwen/Qwen3.5-0.8B" else None
-
-    text_config = {
-        "num_hidden_layers": 2,
-        "hidden_size": 16,
-        "num_attention_heads": 4,
-        "num_key_value_heads": 2,
-        "layer_types": None,  # Set it automatically from num_hidden_layers
-    }
-    vision_config = {
-        "num_hidden_layers": 2,
-        "hidden_size": 16,
-        "num_attention_heads": 4,
-        "num_key_value_heads": 2,
-        "embed_dim": 64,
-    }
-    kwargs = {}
-
-    if issubclass(model_class.config_class, (Qwen2VLConfig, Qwen2_5_VLConfig)):
-        text_config["rope_scaling"] = {"type": "default", "mrope_section": [1, 1], "rope_type": "default"}
-        vision_config["depth"] = 2
-        # Different dict object from text_config; see GH-4101 and transformers#41020
-        kwargs["rope_scaling"] = {"type": "default", "mrope_section": [1, 1], "rope_type": "default"}
-
-    if issubclass(model_class.config_class, Qwen2_5_VLConfig):
-        vision_config["out_hidden_size"] = 16
-        # Different dict object at the config root; see GH-4101 and transformers#41020
-        kwargs["num_hidden_layers"] = 2
-        kwargs["hidden_size"] = 16
-        kwargs["num_attention_heads"] = 4
-
-    if issubclass(model_class.config_class, Idefics2Config):
-        kwargs["perceiver_config"] = {"hidden_size": 16}
-
-    if issubclass(model_class.config_class, Qwen3VLConfig):
-        # So hasattr(config, "layer_types") is False
-        # See: https://github.com/huggingface/transformers/blob/fe5ca9ddaa07fac2872407e75c7a7661216ac956/src/transformers/models/qwen3_vl/modeling_qwen3_vl.py#L420
-        del text_config["layer_types"]
-        # "mrope_section" needs 3 elements: for dim, offset in enumerate((1, 2), start=1): mrope_section[dim]
-        # See: https://github.com/huggingface/transformers/blob/fe5ca9ddaa07fac2872407e75c7a7661216ac956/src/transformers/models/qwen3_vl/modeling_qwen3_vl.py#L361
-        text_config["rope_scaling"] = {"mrope_interleaved": True, "mrope_section": [2, 2, 2], "rope_type": "default"}
-        vision_config["depth"] = 2
-        vision_config["out_hidden_size"] = 16
-
-    if issubclass(model_class.config_class, (Qwen3_5Config, Qwen3_5MoeConfig)):
-        # For tiny layer counts, default `layer_types` can end up with no full-attention layers (e.g. 2 layers and
-        # default interval 4), which breaks Qwen3.5 dynamic cache logic. Keep one full-attention layer at the end.
-        text_config["layer_types"] = ["linear_attention", "full_attention"]
-        text_config["full_attention_interval"] = 2
-        # Qwen3.5-VL vision config expects `depth`/`num_heads`, not `num_hidden_layers`/`num_attention_heads`.
-        vision_config.pop("num_hidden_layers", None)
-        vision_config.pop("num_attention_heads", None)
-        vision_config.pop("num_key_value_heads", None)
-        vision_config.pop("embed_dim", None)
-        vision_config["depth"] = 2
-        vision_config["num_heads"] = 4
-        vision_config["intermediate_size"] = 32
-        vision_config["out_hidden_size"] = 16
-
-    if issubclass(model_class.config_class, Qwen3_5MoeConfig):
-        text_config["num_experts"] = 4
-        text_config["num_experts_per_tok"] = 2
-        text_config["moe_intermediate_size"] = 32
-        text_config["shared_expert_intermediate_size"] = 32
-
-    if model_id == "llava-hf/llava-v1.6-mistral-7b-hf":
-        # Hotfix: llava-hf/llava-v1.6-mistral-7b-hf mistakesly sets text_config.dtype to "bfloat16".
-        # See https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf/discussions/46
-        text_config["dtype"] = None
-
-    if model_class is Gemma4ForConditionalGeneration:
-        # Gemma4 rope validation fails when passing text_config as a dict, so we mutate the config directly.
-        config = AutoConfig.from_pretrained(model_id)
-        for k, v in text_config.items():
-            setattr(config.text_config, k, v)
-        for k, v in vision_config.items():
-            setattr(config.vision_config, k, v)
-        config.text_config.layer_types = ["sliding_attention", "full_attention"]
-        config.text_config.num_kv_shared_layers = 0
-        config.text_config.global_head_dim = 8
-        config.text_config.hidden_size_per_layer_input = 16
-        config.audio_config = None
-    else:
-        config = AutoConfig.from_pretrained(model_id, text_config=text_config, vision_config=vision_config, **kwargs)
-    model = model_class(config).to(dtype=dtype)
-
-    if model_id.startswith("Qwen/Qwen3.5"):
-        # Qwen3.5 models has some weights in float32, to mirror this in the tiny model we need to convert them to float32 manually.
-        # Qwen3.6 reuses the Qwen3_5Moe class but stores those weights in bf16, so the cast is not needed there.
-        for layer in model.model.language_model.layers:
-            if hasattr(layer, "linear_attn"):  # applies to linear attention layers only
-                layer.linear_attn.A_log.data = layer.linear_attn.A_log.data.float()
-                layer.linear_attn.norm.weight.data = layer.linear_attn.norm.weight.data.float()
-
-    suffix = "3.6" if model_id == "Qwen/Qwen3.6-35B-A3B" else None
-    push_to_hub(model, processor, generation_config, "tiny", suffix)
-
-# PEFT models
-model = Qwen3ForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen3ForCausalLM", dtype="auto")
-model = get_peft_model(model, LoraConfig())
-generation_config = GenerationConfig.from_pretrained("trl-internal-testing/tiny-Qwen3ForCausalLM")
-push_to_hub(model, None, None, "tiny")
-
-# Same model, but different weights
-model = Qwen3ForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen3ForCausalLM", dtype="auto")
-model = get_peft_model(model, LoraConfig())
-generation_config = GenerationConfig.from_pretrained("trl-internal-testing/tiny-Qwen3ForCausalLM")
-push_to_hub(model, None, None, "tiny", "2")
diff --git a/scripts/generate_tiny_models/README.md b/scripts/generate_tiny_models/README.md
new file mode 100644
index 00000000000..644039837ac
--- /dev/null
+++ b/scripts/generate_tiny_models/README.md
@@ -0,0 +1,57 @@
+# Tiny model generation
+
+This directory contains one script per tiny model used by the TRL test suite. Each script builds a random-weight, minimally-sized model on top of a real tokenizer/processor and pushes it to the `trl-internal-testing` organization on the Hub.
+
+## Layout
+
+```
+generate_tiny_models/
+├── _common.py                    # shared helpers (push_to_hub, smoke_test, print_config_diff, ...)
+├── for_causal_lm/                # *ForCausalLM + GPT-2 LM head + small/PEFT variants
+├── for_sequence_classification/  # *ForSequenceClassification (reward models)
+└── for_conditional_generation/   # *ForConditionalGeneration (VLMs + T5 + Bart encoder-decoder)
+```
+
+## Running
+
+From the repo root, invoke a script by its module path:
+
+```bash
+python -m scripts.generate_tiny_models.for_causal_lm.qwen3_for_causal_lm
+```
+
+Each script:
+
+1. Checks that the installed `transformers` version matches the one pinned in the script (fails otherwise).
+2. Builds the tiny model with random weights.
+3. Runs `smoke_test` — a minimal forward pass to catch config misspecification and NaNs.
+4. Runs `check_dtype_pattern` — reads the reference safetensors header via the Hub API and flags any tensor whose dtype diverges from the reference (catches e.g. fp32 norms kept inside a bf16 checkpoint).
+5. Runs `print_config_diff` — prints every flat-key difference between the reference Hub config and the tiny model's config (for debugging scale-downs).
+6. Pushes the model, tokenizer/processor, generation config, and model card to the Hub in a single commit.
+
+If the repo already exists on the Hub, the push is skipped by default. Pass `--create-pr` to open a PR against the existing repo instead:
+
+```bash
+python -m scripts.generate_tiny_models.for_causal_lm.qwen3_for_causal_lm --create-pr
+```
+
+Direct overwrites of `main` aren't supported — update via `--create-pr` and merge the PR on the Hub.
+
+## Version pinning
+
+Every script declares `TRANSFORMERS_VERSION = "X.Y.Z"`, which is:
+
+```
+max(version that introduced the model, TRL's transformers floor)
+```
+
+The floor (currently `4.56.2`) is the `transformers>=` bound from `pyproject.toml`. Scripts for models introduced after the floor pin a higher version (e.g. Qwen3-VL pins `4.57.0`, Gemma4 pins `5.6.0`). The check is an exact match via `packaging.version.Version`; install the pinned version before running.
+
+**Why exact?** transformers is backward-compatible (a checkpoint saved by X loads on any ≥ X) but not forward-compatible. TRL CI runs against the floor, so tiny models must be saved with the oldest version that supports them — any newer save risks using config fields the floor can't parse. The exact-match check prevents accidental drift.
+
+## Adding a new tiny model
+
+1. Pick the right subfolder based on the model class suffix (`ForCausalLM`, `ForSequenceClassification`, `ForConditionalGeneration`).
+2. Copy an existing script with the closest shape and adapt it — reference model id, config class, model class, special kwargs.
+3. Set `TRANSFORMERS_VERSION` to the release that introduced the model (or to the TRL floor, whichever is higher).
+4. Run it. Inspect the `[smoke_test]` and `[config_diff]` output before letting it push.
diff --git a/scripts/generate_tiny_models/__init__.py b/scripts/generate_tiny_models/__init__.py
new file mode 100644
index 00000000000..3d26f4482fe
--- /dev/null
+++ b/scripts/generate_tiny_models/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
diff --git a/scripts/generate_tiny_models/_common.py b/scripts/generate_tiny_models/_common.py
new file mode 100644
index 00000000000..e9b7c5c7acc
--- /dev/null
+++ b/scripts/generate_tiny_models/_common.py
@@ -0,0 +1,265 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Shared utilities for the tiny-model generation scripts in this directory.
+# Each sibling script builds a single tiny model and pushes it to the Hub under
+# the `trl-internal-testing` organization.
+
+import argparse
+import os
+import tempfile
+
+import torch
+from huggingface_hub import CommitOperationAdd, HfApi, ModelCard
+from packaging.version import Version
+from torch import nn
+from transformers import AutoConfig, ProcessorMixin
+
+
+ORGANIZATION = "trl-internal-testing"
+
+MODEL_CARD = """
+---
+library_name: transformers
+tags: [trl]
+---
+
+# Tiny {model_class_name}
+
+This is a minimal model built for unit tests in the [TRL](https://github.com/huggingface/trl) library.
+"""
+
+
+api = HfApi()
+
+
+def check_transformers_version(expected_version):
+    """Raise unless the installed transformers matches `expected_version` exactly."""
+    import transformers
+
+    if Version(transformers.__version__) != Version(expected_version):
+        raise RuntimeError(
+            f"This script requires transformers=={expected_version}, but {transformers.__version__} is installed."
+        )
+
+
+def smoke_test(model, tokenizer_or_processor=None):
+    """Run a minimal forward pass to sanity-check the tiny model doesn't crash or produce NaNs."""
+    model.eval()
+    device = next(model.parameters()).device
+
+    if isinstance(tokenizer_or_processor, ProcessorMixin):
+        # VLM path: build a dummy (image, text) input via the processor.
+        from PIL import Image
+
+        processor = tokenizer_or_processor
+        red = Image.new("RGB", (24, 24), color="red")
+        blue = Image.new("RGB", (24, 24), color="blue")
+        messages = [
+            [
+                {
+                    "role": "user",
+                    "content": [{"type": "image", "image": red}, {"type": "text", "text": "What is this?"}],
+                }
+            ],
+            [{"role": "user", "content": [{"type": "text", "text": "Is it blue?"}, {"type": "image", "image": blue}]}],
+        ]
+        inputs = processor.apply_chat_template(
+            conversation=messages,
+            add_generation_prompt=True,
+            tokenize=True,
+            return_dict=True,
+            return_tensors="pt",
+            padding=True,
+        ).to(device)
+    else:
+        inputs = {"input_ids": torch.tensor([[1, 2, 3, 4]], device=device)}
+
+    with torch.no_grad():
+        out = model(**inputs)
+
+    if "logits" in out:
+        output_tensor = out["logits"]
+    elif "last_hidden_state" in out:
+        output_tensor = out["last_hidden_state"]
+    else:
+        raise RuntimeError(f"[smoke_test] {model.__class__.__name__}: no logits or last_hidden_state on output")
+    if torch.isnan(output_tensor).any():
+        raise RuntimeError(f"[smoke_test] {model.__class__.__name__}: NaN in forward output")
+    print(f"[smoke_test] {model.__class__.__name__}: OK (output shape {tuple(output_tensor.shape)})")
+
+
+def _flatten(d, prefix=""):
+    out = {}
+    for k, v in d.items():
+        key = f"{prefix}{k}" if prefix else k
+        if isinstance(v, dict):
+            out.update(_flatten(v, f"{key}."))
+        else:
+            out[key] = v
+    return out
+
+
+_DIFF_IGNORE = {"_name_or_path", "transformers_version", "architectures", "model_type", "torch_dtype", "dtype"}
+
+
+_TORCH_TO_SAFETENSORS_DTYPE = {
+    torch.float32: "F32",
+    torch.float16: "F16",
+    torch.bfloat16: "BF16",
+    torch.float64: "F64",
+    torch.float8_e4m3fn: "F8_E4M3",
+    torch.float8_e5m2: "F8_E5M2",
+    torch.int8: "I8",
+    torch.int16: "I16",
+    torch.int32: "I32",
+    torch.int64: "I64",
+    torch.uint8: "U8",
+    torch.bool: "BOOL",
+}
+
+
+def check_dtype_pattern(reference_id, model):
+    """Flag tensors whose dtype diverges from the reference checkpoint.
+
+    Reads the reference safetensors header via the Hub API (no weight download). Useful to catch cases
+    like Qwen3.5 where specific params (e.g. linear_attn.A_log) are kept in fp32 while the rest is bf16.
+    """
+    metadata = api.get_safetensors_metadata(reference_id)
+    ref_dtypes = {name: info.dtype for fm in metadata.files_metadata.values() for name, info in fm.tensors.items()}
+
+    mismatches = []
+    for name, tensor in model.state_dict().items():
+        ref_dtype = ref_dtypes.get(name)
+        if ref_dtype is None:
+            continue  # tensor has no counterpart in the reference (e.g. scale-down, PEFT wrapper, tying)
+        tiny_dtype = _TORCH_TO_SAFETENSORS_DTYPE.get(tensor.dtype)
+        if tiny_dtype != ref_dtype:
+            mismatches.append((name, ref_dtype, tiny_dtype))
+
+    if not mismatches:
+        print(f"[dtype_check] {reference_id}: all matched tensors have the reference dtype")
+        return
+
+    print(f"[dtype_check] {reference_id}: {len(mismatches)} tensors differ from reference:")
+    for name, ref, tiny in mismatches:
+        print(f"  {name}: reference={ref}, tiny={tiny}")
+
+
+def print_config_diff(reference_id, model):
+    """Print the flat, recursive diff between the reference Hub config and the tiny-model config."""
+    reference_config = AutoConfig.from_pretrained(reference_id)
+    ref_flat = _flatten(reference_config.to_dict())
+    tiny_flat = _flatten(model.config.to_dict())
+
+    keys = sorted(set(ref_flat) | set(tiny_flat))
+    rows = []
+    for k in keys:
+        if any(k == ig or k.endswith(f".{ig}") for ig in _DIFF_IGNORE):
+            continue
+        rv, tv = ref_flat.get(k, "<missing>"), tiny_flat.get(k, "<missing>")
+        if rv != tv:
+            rows.append((k, rv, tv))
+
+    print(f"[config_diff] {reference_id} vs tiny ({len(rows)} differences)")
+    for k, r, t in rows:
+        print(f"  {k:48s} {str(r)[:34]:34s} → {str(t)[:34]}")
+
+
+def _parse_args():
+    parser = argparse.ArgumentParser(add_help=False)
+    parser.add_argument(
+        "--create-pr",
+        action="store_true",
+        help="If the repo already exists, open a PR instead of skipping.",
+    )
+    args, _ = parser.parse_known_args()
+    return args
+
+
+def push_to_hub(model, tokenizer, generation_config, prefix=None, suffix=None, create_pr=None):
+    if create_pr is None:
+        create_pr = _parse_args().create_pr
+
+    model_class_name = model.__class__.__name__
+    content = MODEL_CARD.format(model_class_name=model_class_name)
+    model_card = ModelCard(content)
+    if prefix is not None:
+        model_class_name = f"{prefix}-{model_class_name}"
+    repo_id = f"{ORGANIZATION}/{model_class_name}"
+    if suffix is not None:
+        repo_id += f"-{suffix}"
+
+    exists = api.repo_exists(repo_id)
+    if exists and not create_pr:
+        print(f"Model {repo_id} already exists, skipping (pass --create-pr to open a PR)")
+        return
+
+    if not exists:
+        api.create_repo(repo_id, exist_ok=True)
+
+    # Save all artifacts to a temp dir and upload them in a single commit, so --create-pr opens one PR.
+    with tempfile.TemporaryDirectory() as tmpdir:
+        model.save_pretrained(tmpdir)
+        if tokenizer is not None:
+            tokenizer.save_pretrained(tmpdir)
+        if generation_config is not None:
+            generation_config.save_pretrained(tmpdir)
+        model_card.save(os.path.join(tmpdir, "README.md"))
+
+        operations = [
+            CommitOperationAdd(
+                path_in_repo=os.path.relpath(os.path.join(root, name), tmpdir),
+                path_or_fileobj=os.path.join(root, name),
+            )
+            for root, _, files in os.walk(tmpdir)
+            for name in files
+        ]
+        commit_info = api.create_commit(
+            repo_id=repo_id,
+            operations=operations,
+            commit_message=f"Upload {model.__class__.__name__}",
+            create_pr=exists and create_pr,
+        )
+        if commit_info.pr_url:
+            print(f"[push_to_hub] PR opened: {commit_info.pr_url}")
+
+
+def init_weights_tiny_model(model):
+    """
+    Initialize tiny test models to avoid NaNs from uninitialized weights.
+
+    Uses safe defaults:
+      - Linear/Conv1d: Xavier uniform (weights), zero (biases)
+      - Embedding: Normal(0, 0.02)
+      - LayerNorm: Ones (weights), zero (biases)
+    """
+    for module in model.modules():
+        if isinstance(module, nn.Linear):
+            if module.bias is not None:
+                nn.init.zeros_(module.bias)
+            nn.init.xavier_uniform_(module.weight)
+
+        elif isinstance(module, nn.Embedding):
+            nn.init.normal_(module.weight, mean=0.0, std=0.02)
+
+        elif isinstance(module, nn.LayerNorm):
+            nn.init.ones_(module.weight)
+            if module.bias is not None:
+                nn.init.zeros_(module.bias)
+
+        elif isinstance(module, nn.Conv1d):
+            if module.bias is not None:
+                nn.init.zeros_(module.bias)
+            nn.init.xavier_uniform_(module.weight)
diff --git a/scripts/generate_tiny_models/for_causal_lm/__init__.py b/scripts/generate_tiny_models/for_causal_lm/__init__.py
new file mode 100644
index 00000000000..3d26f4482fe
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
diff --git a/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py
new file mode 100644
index 00000000000..6632bbb13b0
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py
@@ -0,0 +1,48 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoTokenizer, Cohere2Config, Cohere2ForCausalLM, GenerationConfig
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "CohereLabs/tiny-aya-earth"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = Cohere2Config(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = Cohere2ForCausalLM(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py
new file mode 100644
index 00000000000..daafb1ec187
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py
@@ -0,0 +1,48 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoTokenizer, CohereConfig, CohereForCausalLM, GenerationConfig
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "CohereLabs/aya-expanse-8b"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = CohereConfig(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = CohereForCausalLM(config).to(dtype=torch.float16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py
new file mode 100644
index 00000000000..fe13290ecfa
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py
@@ -0,0 +1,48 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoTokenizer, DeepseekV3Config, DeepseekV3ForCausalLM, GenerationConfig
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "deepseek-ai/DeepSeek-R1"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = DeepseekV3Config(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = DeepseekV3ForCausalLM(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm_0528.py b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm_0528.py
new file mode 100644
index 00000000000..13db90d36f7
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm_0528.py
@@ -0,0 +1,50 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Note: R1-0528 is kept in addition to R1 because it has a different chat template.
+
+import torch
+from transformers import AutoTokenizer, DeepseekV3Config, DeepseekV3ForCausalLM, GenerationConfig
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "deepseek-ai/DeepSeek-R1-0528"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = DeepseekV3Config(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = DeepseekV3ForCausalLM(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny", "0528")
diff --git a/scripts/generate_tiny_models/for_causal_lm/falcon_mamba_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/falcon_mamba_for_causal_lm.py
new file mode 100644
index 00000000000..77133708ac1
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/falcon_mamba_for_causal_lm.py
@@ -0,0 +1,48 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoTokenizer, FalconMambaConfig, FalconMambaForCausalLM, GenerationConfig
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "tiiuae/falcon-7b-instruct"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = FalconMambaConfig(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = FalconMambaForCausalLM(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_causal_lm/gemma2_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gemma2_for_causal_lm.py
new file mode 100644
index 00000000000..68935533aac
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/gemma2_for_causal_lm.py
@@ -0,0 +1,48 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoTokenizer, Gemma2Config, Gemma2ForCausalLM, GenerationConfig
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "google/gemma-2-2b-it"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = Gemma2Config(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = Gemma2ForCausalLM(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_causal_lm/gemma_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gemma_for_causal_lm.py
new file mode 100644
index 00000000000..22874adc2f9
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/gemma_for_causal_lm.py
@@ -0,0 +1,48 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoTokenizer, GemmaConfig, GemmaForCausalLM, GenerationConfig
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "google/gemma-7b-it"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = GemmaConfig(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = GemmaForCausalLM(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_causal_lm/glm4_moe_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/glm4_moe_for_causal_lm.py
new file mode 100644
index 00000000000..b0721795295
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/glm4_moe_for_causal_lm.py
@@ -0,0 +1,50 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoTokenizer, GenerationConfig, Glm4MoeConfig, Glm4MoeForCausalLM
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "zai-org/GLM-4.5"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = Glm4MoeConfig(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+    n_routed_experts=4,
+    num_experts_per_tok=2,
+)
+model = Glm4MoeForCausalLM(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py b/scripts/generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py
new file mode 100644
index 00000000000..8d1eb5103ea
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py
@@ -0,0 +1,48 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoTokenizer, GenerationConfig, GPT2Config, GPT2LMHeadModel
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "openai-community/gpt2"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = GPT2Config(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = GPT2LMHeadModel(config).to(dtype=torch.float32)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_causal_lm/gpt_neox_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gpt_neox_for_causal_lm.py
new file mode 100644
index 00000000000..080076f18c8
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/gpt_neox_for_causal_lm.py
@@ -0,0 +1,48 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoTokenizer, GenerationConfig, GPTNeoXConfig, GPTNeoXForCausalLM
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "EleutherAI/pythia-14m"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = GPTNeoXConfig(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = GPTNeoXForCausalLM(config).to(dtype=torch.float16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_causal_lm/gpt_oss_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gpt_oss_for_causal_lm.py
new file mode 100644
index 00000000000..6ae18272af7
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/gpt_oss_for_causal_lm.py
@@ -0,0 +1,50 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoTokenizer, GenerationConfig, GptOssConfig, GptOssForCausalLM
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "openai/gpt-oss-20b"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = GptOssConfig(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+    num_local_experts=4,
+    num_experts_per_tok=2,
+)
+model = GptOssForCausalLM(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3.py b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3.py
new file mode 100644
index 00000000000..1622c372870
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3.py
@@ -0,0 +1,48 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = LlamaConfig(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = LlamaForCausalLM(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny", "3")
diff --git a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_1.py b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_1.py
new file mode 100644
index 00000000000..cb361901fcf
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_1.py
@@ -0,0 +1,48 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = LlamaConfig(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = LlamaForCausalLM(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny", "3.1")
diff --git a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_2.py b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_2.py
new file mode 100644
index 00000000000..34fda29b5f9
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_2.py
@@ -0,0 +1,48 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = LlamaConfig(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = LlamaForCausalLM(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny", "3.2")
diff --git a/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_1.py b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_1.py
new file mode 100644
index 00000000000..34615475bf5
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_1.py
@@ -0,0 +1,48 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoTokenizer, GenerationConfig, MistralConfig, MistralForCausalLM
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.1"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = MistralConfig(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = MistralForCausalLM(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny", "0.1")
diff --git a/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_2.py b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_2.py
new file mode 100644
index 00000000000..aa4a9ce849a
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_2.py
@@ -0,0 +1,48 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoTokenizer, GenerationConfig, MistralConfig, MistralForCausalLM
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = MistralConfig(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = MistralForCausalLM(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny", "0.2")
diff --git a/scripts/generate_tiny_models/for_causal_lm/opt_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/opt_for_causal_lm.py
new file mode 100644
index 00000000000..cf52a1c7c93
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/opt_for_causal_lm.py
@@ -0,0 +1,48 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoTokenizer, GenerationConfig, OPTConfig, OPTForCausalLM
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "facebook/opt-1.3b"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = OPTConfig(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = OPTForCausalLM(config).to(dtype=torch.float16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm.py
new file mode 100644
index 00000000000..7688639f873
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm.py
@@ -0,0 +1,29 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from peft import LoraConfig, get_peft_model
+from transformers import Qwen3ForCausalLM
+
+from .._common import check_transformers_version, push_to_hub, smoke_test
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+BASE = "trl-internal-testing/tiny-Qwen3ForCausalLM"
+
+model = Qwen3ForCausalLM.from_pretrained(BASE, dtype="auto")
+model = get_peft_model(model, LoraConfig())
+smoke_test(model, None)
+push_to_hub(model, None, None, "tiny")
diff --git a/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm_2.py b/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm_2.py
new file mode 100644
index 00000000000..cf84cfd7dee
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm_2.py
@@ -0,0 +1,31 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Same model class as peft_qwen3_for_causal_lm.py, with different (random) LoRA weights.
+
+from peft import LoraConfig, get_peft_model
+from transformers import Qwen3ForCausalLM
+
+from .._common import check_transformers_version, push_to_hub, smoke_test
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+BASE = "trl-internal-testing/tiny-Qwen3ForCausalLM"
+
+model = Qwen3ForCausalLM.from_pretrained(BASE, dtype="auto")
+model = get_peft_model(model, LoraConfig())
+smoke_test(model, None)
+push_to_hub(model, None, None, "tiny", "2")
diff --git a/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3.py b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3.py
new file mode 100644
index 00000000000..edb13a7634b
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3.py
@@ -0,0 +1,48 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoTokenizer, GenerationConfig, Phi3Config, Phi3ForCausalLM
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = Phi3Config(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = Phi3ForCausalLM(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny", "3")
diff --git a/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3_5.py b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3_5.py
new file mode 100644
index 00000000000..d5816214c0b
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3_5.py
@@ -0,0 +1,48 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoTokenizer, GenerationConfig, Phi3Config, Phi3ForCausalLM
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "microsoft/Phi-3.5-mini-instruct"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = Phi3Config(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = Phi3ForCausalLM(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny", "3.5")
diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5.py b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5.py
new file mode 100644
index 00000000000..4b94615fcaf
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5.py
@@ -0,0 +1,48 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "Qwen/Qwen2.5-32B-Instruct"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = Qwen2Config(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = Qwen2ForCausalLM(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny", "2.5")
diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5_coder.py b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5_coder.py
new file mode 100644
index 00000000000..b7cce4494ba
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5_coder.py
@@ -0,0 +1,48 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "Qwen/Qwen2.5-Coder-0.5B"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = Qwen2Config(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = Qwen2ForCausalLM(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny", "2.5-Coder")
diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm.py
new file mode 100644
index 00000000000..00afd997aad
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm.py
@@ -0,0 +1,50 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoTokenizer, GenerationConfig, Qwen3Config, Qwen3ForCausalLM
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "Qwen/Qwen3-8B"
+# Revision pins the chat template PR with `{% generation %}` support.
+REVISION = "refs/pr/14"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, revision=REVISION)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID, revision=REVISION)
+config = Qwen3Config(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = Qwen3ForCausalLM(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm_instruct_2507.py b/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm_instruct_2507.py
new file mode 100644
index 00000000000..6f84c69005b
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm_instruct_2507.py
@@ -0,0 +1,50 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Qwen3-4B-Instruct-2507 ships the non-thinking chat template, distinct from the default Qwen3 template.
+
+import torch
+from transformers import AutoTokenizer, GenerationConfig, Qwen3Config, Qwen3ForCausalLM
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "Qwen/Qwen3-4B-Instruct-2507"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = Qwen3Config(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = Qwen3ForCausalLM(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny", "Instruct-2507")
diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen3_moe_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/qwen3_moe_for_causal_lm.py
new file mode 100644
index 00000000000..ccf562f3c46
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/qwen3_moe_for_causal_lm.py
@@ -0,0 +1,50 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoTokenizer, GenerationConfig, Qwen3MoeConfig, Qwen3MoeForCausalLM
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "Qwen/Qwen3-30B-A3B"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = Qwen3MoeConfig(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=8,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+    num_experts=4,
+    num_experts_per_tok=2,
+)
+model = Qwen3MoeForCausalLM(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_causal_lm/small_qwen2_for_causal_lm_2_5.py b/scripts/generate_tiny_models/for_causal_lm/small_qwen2_for_causal_lm_2_5.py
new file mode 100644
index 00000000000..acb0ef4f465
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/small_qwen2_for_causal_lm_2_5.py
@@ -0,0 +1,42 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Slightly bigger than the "tiny" variant: vLLM requires hidden_size // num_attention_heads = 32.
+
+import torch
+from transformers import AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM
+
+from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "Qwen/Qwen2.5-32B-Instruct"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = Qwen2Config(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=128,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = Qwen2ForCausalLM(config).to(dtype=torch.bfloat16)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "small", "2.5")
diff --git a/scripts/generate_tiny_models/for_causal_lm/small_qwen3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/small_qwen3_for_causal_lm.py
new file mode 100644
index 00000000000..873fcb9641f
--- /dev/null
+++ b/scripts/generate_tiny_models/for_causal_lm/small_qwen3_for_causal_lm.py
@@ -0,0 +1,42 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Slightly bigger than the "tiny" variant: vLLM requires hidden_size // num_attention_heads = 32.
+
+import torch
+from transformers import AutoTokenizer, GenerationConfig, Qwen3Config, Qwen3ForCausalLM
+
+from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "Qwen/Qwen3-4B"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = Qwen3Config(
+    vocab_size=len(tokenizer.vocab),
+    hidden_size=128,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = Qwen3ForCausalLM(config).to(dtype=torch.bfloat16)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "small")
diff --git a/scripts/generate_tiny_models/for_conditional_generation/__init__.py b/scripts/generate_tiny_models/for_conditional_generation/__init__.py
new file mode 100644
index 00000000000..3d26f4482fe
--- /dev/null
+++ b/scripts/generate_tiny_models/for_conditional_generation/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
diff --git a/scripts/generate_tiny_models/for_conditional_generation/bart_model.py b/scripts/generate_tiny_models/for_conditional_generation/bart_model.py
new file mode 100644
index 00000000000..c754515af72
--- /dev/null
+++ b/scripts/generate_tiny_models/for_conditional_generation/bart_model.py
@@ -0,0 +1,33 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoConfig, AutoTokenizer, BartModel
+
+from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "facebook/bart-base"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+config = AutoConfig.from_pretrained(MODEL_ID)
+config.d_model = 24
+model = BartModel(config).to(dtype=torch.float32)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, None, "tiny")
diff --git a/scripts/generate_tiny_models/for_conditional_generation/gemma3_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/gemma3_for_conditional_generation.py
new file mode 100644
index 00000000000..2f85c68abe8
--- /dev/null
+++ b/scripts/generate_tiny_models/for_conditional_generation/gemma3_for_conditional_generation.py
@@ -0,0 +1,49 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoConfig, AutoProcessor, Gemma3ForConditionalGeneration, GenerationConfig
+
+from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "google/gemma-3-4b-it"
+
+processor = AutoProcessor.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+
+text_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "layer_types": None,  # Set it automatically from num_hidden_layers
+}
+vision_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "embed_dim": 64,
+}
+
+config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config)
+model = Gemma3ForConditionalGeneration(config).to(dtype=torch.bfloat16)
+smoke_test(model, processor)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, processor, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_conditional_generation/gemma4_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/gemma4_for_conditional_generation.py
new file mode 100644
index 00000000000..8d3cba21904
--- /dev/null
+++ b/scripts/generate_tiny_models/for_conditional_generation/gemma4_for_conditional_generation.py
@@ -0,0 +1,61 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Gemma4 rope validation fails when passing text_config as a dict through AutoConfig,
+# so the config is loaded first and then mutated in place.
+
+import torch
+from transformers import AutoConfig, AutoProcessor, Gemma4ForConditionalGeneration, GenerationConfig
+
+from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test
+
+
+TRANSFORMERS_VERSION = "5.6.0"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "google/gemma-4-E2B-it"
+
+processor = AutoProcessor.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+
+text_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+}
+vision_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "embed_dim": 64,
+}
+
+config = AutoConfig.from_pretrained(MODEL_ID)
+for k, v in text_config.items():
+    setattr(config.text_config, k, v)
+for k, v in vision_config.items():
+    setattr(config.vision_config, k, v)
+config.text_config.layer_types = ["sliding_attention", "full_attention"]
+config.text_config.num_kv_shared_layers = 0
+config.text_config.global_head_dim = 8
+config.text_config.hidden_size_per_layer_input = 16
+config.audio_config = None
+
+model = Gemma4ForConditionalGeneration(config).to(dtype=torch.bfloat16)
+smoke_test(model, processor)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, processor, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_conditional_generation/idefics2_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/idefics2_for_conditional_generation.py
new file mode 100644
index 00000000000..4e6dd711aff
--- /dev/null
+++ b/scripts/generate_tiny_models/for_conditional_generation/idefics2_for_conditional_generation.py
@@ -0,0 +1,54 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoConfig, AutoProcessor, GenerationConfig, Idefics2ForConditionalGeneration
+
+from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "HuggingFaceM4/idefics2-8b"
+
+processor = AutoProcessor.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+
+text_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "layer_types": None,
+}
+vision_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "embed_dim": 64,
+}
+
+config = AutoConfig.from_pretrained(
+    MODEL_ID,
+    text_config=text_config,
+    vision_config=vision_config,
+    perceiver_config={"hidden_size": 16},
+)
+model = Idefics2ForConditionalGeneration(config).to(dtype=torch.float32)
+smoke_test(model, processor)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, processor, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_conditional_generation/idefics3_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/idefics3_for_conditional_generation.py
new file mode 100644
index 00000000000..a24909ab7d8
--- /dev/null
+++ b/scripts/generate_tiny_models/for_conditional_generation/idefics3_for_conditional_generation.py
@@ -0,0 +1,49 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoConfig, AutoProcessor, GenerationConfig, Idefics3ForConditionalGeneration
+
+from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "HuggingFaceM4/Idefics3-8B-Llama3"
+
+processor = AutoProcessor.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+
+text_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "layer_types": None,
+}
+vision_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "embed_dim": 64,
+}
+
+config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config)
+model = Idefics3ForConditionalGeneration(config).to(dtype=torch.bfloat16)
+smoke_test(model, processor)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, processor, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_conditional_generation/internvl_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/internvl_for_conditional_generation.py
new file mode 100644
index 00000000000..76ec81ca4ea
--- /dev/null
+++ b/scripts/generate_tiny_models/for_conditional_generation/internvl_for_conditional_generation.py
@@ -0,0 +1,49 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoConfig, AutoProcessor, GenerationConfig, InternVLForConditionalGeneration
+
+from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "OpenGVLab/InternVL3-8B-hf"
+
+processor = AutoProcessor.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+
+text_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "layer_types": None,
+}
+vision_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "embed_dim": 64,
+}
+
+config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config)
+model = InternVLForConditionalGeneration(config).to(dtype=torch.bfloat16)
+smoke_test(model, processor)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, processor, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_conditional_generation/llava_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/llava_for_conditional_generation.py
new file mode 100644
index 00000000000..1a7ecf4b8cb
--- /dev/null
+++ b/scripts/generate_tiny_models/for_conditional_generation/llava_for_conditional_generation.py
@@ -0,0 +1,49 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoConfig, AutoProcessor, GenerationConfig, LlavaForConditionalGeneration
+
+from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "llava-hf/llava-1.5-7b-hf"
+
+processor = AutoProcessor.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+
+text_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "layer_types": None,
+}
+vision_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "embed_dim": 64,
+}
+
+config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config)
+model = LlavaForConditionalGeneration(config).to(dtype=torch.float16)
+smoke_test(model, processor)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, processor, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_conditional_generation/llava_next_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/llava_next_for_conditional_generation.py
new file mode 100644
index 00000000000..875fe0545da
--- /dev/null
+++ b/scripts/generate_tiny_models/for_conditional_generation/llava_next_for_conditional_generation.py
@@ -0,0 +1,55 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Original model dtype is float16, but it triggers CUDA device-side assert on generation (see GH-4741),
+# so this tiny model is saved in bfloat16.
+# Upstream hotfix: llava-hf/llava-v1.6-mistral-7b-hf mistakenly sets text_config.dtype to "bfloat16"
+# (see https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf/discussions/46), which we clear here.
+
+import torch
+from transformers import AutoConfig, AutoProcessor, GenerationConfig, LlavaNextForConditionalGeneration
+
+from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "llava-hf/llava-v1.6-mistral-7b-hf"
+
+processor = AutoProcessor.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+
+text_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "layer_types": None,
+    "dtype": None,  # hotfix for upstream text_config.dtype = "bfloat16"
+}
+vision_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "embed_dim": 64,
+}
+
+config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config)
+model = LlavaNextForConditionalGeneration(config).to(dtype=torch.bfloat16)
+smoke_test(model, processor)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, processor, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_conditional_generation/paligemma_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/paligemma_for_conditional_generation.py
new file mode 100644
index 00000000000..5b5fa67ec9d
--- /dev/null
+++ b/scripts/generate_tiny_models/for_conditional_generation/paligemma_for_conditional_generation.py
@@ -0,0 +1,49 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoConfig, AutoProcessor, GenerationConfig, PaliGemmaForConditionalGeneration
+
+from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "google/paligemma-3b-pt-224"
+
+processor = AutoProcessor.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+
+text_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "layer_types": None,
+}
+vision_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "embed_dim": 64,
+}
+
+config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config)
+model = PaliGemmaForConditionalGeneration(config).to(dtype=torch.float32)
+smoke_test(model, processor)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, processor, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_conditional_generation/qwen2_5_vl_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/qwen2_5_vl_for_conditional_generation.py
new file mode 100644
index 00000000000..2eab8826124
--- /dev/null
+++ b/scripts/generate_tiny_models/for_conditional_generation/qwen2_5_vl_for_conditional_generation.py
@@ -0,0 +1,63 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Note: Qwen2.5-VL requires out_hidden_size on the vision config, plus root-level num_hidden_layers/hidden_size/
+# num_attention_heads (distinct from the text_config fields). See GH-4101 and transformers#41020.
+
+import torch
+from transformers import AutoConfig, AutoProcessor, GenerationConfig, Qwen2_5_VLForConditionalGeneration
+
+from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "Qwen/Qwen2.5-VL-3B-Instruct"
+
+processor = AutoProcessor.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+
+text_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "layer_types": None,
+    "rope_scaling": {"type": "default", "mrope_section": [1, 1], "rope_type": "default"},
+}
+vision_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "embed_dim": 64,
+    "depth": 2,
+    "out_hidden_size": 16,
+}
+
+config = AutoConfig.from_pretrained(
+    MODEL_ID,
+    text_config=text_config,
+    vision_config=vision_config,
+    rope_scaling={"type": "default", "mrope_section": [1, 1], "rope_type": "default"},
+    num_hidden_layers=2,
+    hidden_size=16,
+    num_attention_heads=4,
+)
+model = Qwen2_5_VLForConditionalGeneration(config).to(dtype=torch.bfloat16)
+smoke_test(model, processor)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, processor, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_conditional_generation/qwen2_vl_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/qwen2_vl_for_conditional_generation.py
new file mode 100644
index 00000000000..b3fdf792418
--- /dev/null
+++ b/scripts/generate_tiny_models/for_conditional_generation/qwen2_vl_for_conditional_generation.py
@@ -0,0 +1,58 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Note: two distinct rope_scaling dict objects (root and text_config). See GH-4101 and transformers#41020.
+
+import torch
+from transformers import AutoConfig, AutoProcessor, GenerationConfig, Qwen2VLForConditionalGeneration
+
+from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct"
+
+processor = AutoProcessor.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+
+text_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "layer_types": None,
+    "rope_scaling": {"type": "default", "mrope_section": [1, 1], "rope_type": "default"},
+}
+vision_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "embed_dim": 64,
+    "depth": 2,
+}
+
+config = AutoConfig.from_pretrained(
+    MODEL_ID,
+    text_config=text_config,
+    vision_config=vision_config,
+    rope_scaling={"type": "default", "mrope_section": [1, 1], "rope_type": "default"},
+)
+model = Qwen2VLForConditionalGeneration(config).to(dtype=torch.bfloat16)
+smoke_test(model, processor)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, processor, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_conditional_generation/qwen3_5_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/qwen3_5_for_conditional_generation.py
new file mode 100644
index 00000000000..ce0d0dfd0e7
--- /dev/null
+++ b/scripts/generate_tiny_models/for_conditional_generation/qwen3_5_for_conditional_generation.py
@@ -0,0 +1,64 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Notes:
+# - Qwen3.5 auto-builds layer_types from num_hidden_layers with default interval 4, so tiny models
+#   (2 layers) end up all-linear-attention, which breaks dynamic cache. Force one full-attention layer.
+# - The vision config expects `depth`/`num_heads` (not `num_hidden_layers`/`num_attention_heads`).
+# - Qwen3.5 has no published generation_config on the Hub yet.
+# - Qwen3.5 keeps some linear-attn weights in float32; we cast them back after the bfloat16 conversion.
+
+import torch
+from transformers import AutoConfig, AutoProcessor, Qwen3_5ForConditionalGeneration
+
+from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test
+
+
+TRANSFORMERS_VERSION = "5.2.0"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "Qwen/Qwen3.5-0.8B"
+
+processor = AutoProcessor.from_pretrained(MODEL_ID)
+
+text_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "layer_types": ["linear_attention", "full_attention"],
+    "full_attention_interval": 2,
+}
+vision_config = {
+    "hidden_size": 16,
+    "depth": 2,
+    "num_heads": 4,
+    "intermediate_size": 32,
+    "out_hidden_size": 16,
+}
+
+config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config)
+model = Qwen3_5ForConditionalGeneration(config).to(dtype=torch.bfloat16)
+
+# Restore float32 for linear-attn weights that the upstream model keeps in fp32.
+for i, layer_type in enumerate(config.text_config.layer_types):
+    if layer_type == "linear_attention":
+        linear_attn = model.model.language_model.layers[i].linear_attn
+        linear_attn.A_log.data = linear_attn.A_log.data.float()
+        linear_attn.norm.weight.data = linear_attn.norm.weight.data.float()
+
+smoke_test(model, processor)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, processor, None, "tiny")
diff --git a/scripts/generate_tiny_models/for_conditional_generation/qwen3_6_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/qwen3_6_for_conditional_generation.py
new file mode 100644
index 00000000000..a0f5bef6e7a
--- /dev/null
+++ b/scripts/generate_tiny_models/for_conditional_generation/qwen3_6_for_conditional_generation.py
@@ -0,0 +1,63 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Notes:
+# - Qwen3.6 reuses the Qwen3_5Moe class with extra MoE config fields
+#   (num_experts, num_experts_per_tok, moe_intermediate_size, shared_expert_intermediate_size).
+# - Same layer_types/full_attention_interval workaround as Qwen3.5: tiny models (2 layers) need
+#   one full-attention layer to keep the dynamic cache happy.
+# - The vision config expects `depth`/`num_heads` (not `num_hidden_layers`/`num_attention_heads`).
+# - Unlike Qwen3.5, Qwen3.6 stores linear-attn weights in bf16, so no float32 cast is needed.
+
+import torch
+from transformers import AutoConfig, AutoProcessor, GenerationConfig, Qwen3_5MoeForConditionalGeneration
+
+from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test
+
+
+TRANSFORMERS_VERSION = "5.2.0"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "Qwen/Qwen3.6-35B-A3B"
+
+processor = AutoProcessor.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+
+text_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "layer_types": ["linear_attention", "full_attention"],
+    "full_attention_interval": 2,
+    "num_experts": 4,
+    "num_experts_per_tok": 2,
+    "moe_intermediate_size": 32,
+    "shared_expert_intermediate_size": 32,
+}
+vision_config = {
+    "hidden_size": 16,
+    "depth": 2,
+    "num_heads": 4,
+    "intermediate_size": 32,
+    "out_hidden_size": 16,
+}
+
+config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config)
+model = Qwen3_5MoeForConditionalGeneration(config).to(dtype=torch.bfloat16)
+
+smoke_test(model, processor)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, processor, generation_config, "tiny", "3.6")
diff --git a/scripts/generate_tiny_models/for_conditional_generation/qwen3_vl_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/qwen3_vl_for_conditional_generation.py
new file mode 100644
index 00000000000..f04fb4cfadf
--- /dev/null
+++ b/scripts/generate_tiny_models/for_conditional_generation/qwen3_vl_for_conditional_generation.py
@@ -0,0 +1,57 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Notes:
+# - "layer_types" is intentionally omitted from text_config: qwen3_vl's modeling code checks
+#   `hasattr(config, "layer_types")` and uses a different path when absent
+#   (see transformers/models/qwen3_vl/modeling_qwen3_vl.py).
+# - mrope_section needs 3 elements (for dim, offset in enumerate((1, 2), start=1): mrope_section[dim]).
+
+import torch
+from transformers import AutoConfig, AutoProcessor, GenerationConfig, Qwen3VLForConditionalGeneration
+
+from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test
+
+
+TRANSFORMERS_VERSION = "4.57.0"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "Qwen/Qwen3-VL-2B-Instruct"
+
+processor = AutoProcessor.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+
+text_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "rope_scaling": {"mrope_interleaved": True, "mrope_section": [2, 2, 2], "rope_type": "default"},
+}
+vision_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "embed_dim": 64,
+    "depth": 2,
+    "out_hidden_size": 16,
+}
+
+config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config)
+model = Qwen3VLForConditionalGeneration(config).to(dtype=torch.bfloat16)
+smoke_test(model, processor)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, processor, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_conditional_generation/smolvlm_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/smolvlm_for_conditional_generation.py
new file mode 100644
index 00000000000..b019e6d084b
--- /dev/null
+++ b/scripts/generate_tiny_models/for_conditional_generation/smolvlm_for_conditional_generation.py
@@ -0,0 +1,49 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoConfig, AutoProcessor, GenerationConfig, SmolVLMForConditionalGeneration
+
+from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
+
+processor = AutoProcessor.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+
+text_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "layer_types": None,
+}
+vision_config = {
+    "num_hidden_layers": 2,
+    "hidden_size": 16,
+    "num_attention_heads": 4,
+    "num_key_value_heads": 2,
+    "embed_dim": 64,
+}
+
+config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config)
+model = SmolVLMForConditionalGeneration(config).to(dtype=torch.float32)
+smoke_test(model, processor)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, processor, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_conditional_generation/t5_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/t5_for_conditional_generation.py
new file mode 100644
index 00000000000..451e7b5ecc9
--- /dev/null
+++ b/scripts/generate_tiny_models/for_conditional_generation/t5_for_conditional_generation.py
@@ -0,0 +1,34 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoConfig, AutoTokenizer, GenerationConfig, T5ForConditionalGeneration
+
+from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "google/flan-t5-small"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = AutoConfig.from_pretrained(MODEL_ID)
+config.d_model = 24
+model = T5ForConditionalGeneration(config).to(dtype=torch.float32)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_sequence_classification/__init__.py b/scripts/generate_tiny_models/for_sequence_classification/__init__.py
new file mode 100644
index 00000000000..3d26f4482fe
--- /dev/null
+++ b/scripts/generate_tiny_models/for_sequence_classification/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
diff --git a/scripts/generate_tiny_models/for_sequence_classification/gpt_neox_for_sequence_classification.py b/scripts/generate_tiny_models/for_sequence_classification/gpt_neox_for_sequence_classification.py
new file mode 100644
index 00000000000..c0e270ad050
--- /dev/null
+++ b/scripts/generate_tiny_models/for_sequence_classification/gpt_neox_for_sequence_classification.py
@@ -0,0 +1,49 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoConfig, AutoTokenizer, GenerationConfig, GPTNeoXForSequenceClassification
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "EleutherAI/pythia-14m"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = AutoConfig.from_pretrained(
+    MODEL_ID,
+    num_labels=1,
+    hidden_size=16,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = GPTNeoXForSequenceClassification(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_sequence_classification/llama_for_sequence_classification_3_2.py b/scripts/generate_tiny_models/for_sequence_classification/llama_for_sequence_classification_3_2.py
new file mode 100644
index 00000000000..e082ed94656
--- /dev/null
+++ b/scripts/generate_tiny_models/for_sequence_classification/llama_for_sequence_classification_3_2.py
@@ -0,0 +1,49 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoConfig, AutoTokenizer, GenerationConfig, LlamaForSequenceClassification
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = AutoConfig.from_pretrained(
+    MODEL_ID,
+    num_labels=1,
+    hidden_size=16,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+model = LlamaForSequenceClassification(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny", "3.2")
diff --git a/scripts/generate_tiny_models/for_sequence_classification/qwen2_for_sequence_classification_2_5.py b/scripts/generate_tiny_models/for_sequence_classification/qwen2_for_sequence_classification_2_5.py
new file mode 100644
index 00000000000..6b83cf4204f
--- /dev/null
+++ b/scripts/generate_tiny_models/for_sequence_classification/qwen2_for_sequence_classification_2_5.py
@@ -0,0 +1,51 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen2ForSequenceClassification
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "Qwen/Qwen2.5-32B-Instruct"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = AutoConfig.from_pretrained(
+    MODEL_ID,
+    num_labels=1,
+    hidden_size=16,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+# Bug in transformers: it ignores num_hidden_layers to build layer_types
+config.layer_types = config.layer_types[:2]
+model = Qwen2ForSequenceClassification(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny", "2.5")
diff --git a/scripts/generate_tiny_models/for_sequence_classification/qwen3_for_sequence_classification.py b/scripts/generate_tiny_models/for_sequence_classification/qwen3_for_sequence_classification.py
new file mode 100644
index 00000000000..fa05dcc1105
--- /dev/null
+++ b/scripts/generate_tiny_models/for_sequence_classification/qwen3_for_sequence_classification.py
@@ -0,0 +1,51 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen3ForSequenceClassification
+
+from .._common import (
+    check_dtype_pattern,
+    check_transformers_version,
+    init_weights_tiny_model,
+    print_config_diff,
+    push_to_hub,
+    smoke_test,
+)
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "Qwen/Qwen3-4B"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = AutoConfig.from_pretrained(
+    MODEL_ID,
+    num_labels=1,
+    hidden_size=16,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+)
+# Bug in transformers: it ignores num_hidden_layers to build layer_types
+config.layer_types = config.layer_types[:2]
+model = Qwen3ForSequenceClassification(config).to(dtype=torch.bfloat16)
+init_weights_tiny_model(model)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny")
diff --git a/scripts/generate_tiny_models/for_sequence_classification/qwen3_moe_for_sequence_classification.py b/scripts/generate_tiny_models/for_sequence_classification/qwen3_moe_for_sequence_classification.py
new file mode 100644
index 00000000000..b89842afbaa
--- /dev/null
+++ b/scripts/generate_tiny_models/for_sequence_classification/qwen3_moe_for_sequence_classification.py
@@ -0,0 +1,43 @@
+# Copyright 2020-2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen3MoeForSequenceClassification
+
+from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test
+
+
+TRANSFORMERS_VERSION = "4.56.2"
+check_transformers_version(TRANSFORMERS_VERSION)
+
+MODEL_ID = "Qwen/Qwen3-30B-A3B"
+
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+generation_config = GenerationConfig.from_pretrained(MODEL_ID)
+config = AutoConfig.from_pretrained(
+    MODEL_ID,
+    num_labels=1,
+    hidden_size=16,
+    num_attention_heads=4,
+    num_key_value_heads=2,
+    num_hidden_layers=2,
+    intermediate_size=32,
+    num_experts=4,
+    num_experts_per_tok=2,
+)
+model = Qwen3MoeForSequenceClassification(config).to(dtype=torch.bfloat16)
+smoke_test(model, tokenizer)
+check_dtype_pattern(MODEL_ID, model)
+print_config_diff(MODEL_ID, model)
+push_to_hub(model, tokenizer, generation_config, "tiny")