From 730c87629a91a705e0177877c907d1d9e9249cf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Fri, 24 Apr 2026 18:30:27 +0000 Subject: [PATCH 01/20] New tiny model generation --- scripts/generate_tiny_models.py | 437 ------------------ scripts/generate_tiny_models/README.md | 50 ++ scripts/generate_tiny_models/__init__.py | 0 scripts/generate_tiny_models/_common.py | 253 ++++++++++ .../for_causal_lm/__init__.py | 0 .../for_causal_lm/cohere2_for_causal_lm.py | 40 ++ .../for_causal_lm/cohere_for_causal_lm.py | 40 ++ .../deepseek_v3_for_causal_lm.py | 40 ++ .../deepseek_v3_for_causal_lm_0528.py | 42 ++ .../falcon_mamba_for_causal_lm.py | 40 ++ .../for_causal_lm/gemma2_for_causal_lm.py | 40 ++ .../for_causal_lm/gemma_for_causal_lm.py | 40 ++ .../for_causal_lm/glm4_moe_for_causal_lm.py | 42 ++ .../for_causal_lm/gpt2_lm_head_model.py | 40 ++ .../for_causal_lm/gpt_neox_for_causal_lm.py | 40 ++ .../for_causal_lm/gpt_oss_for_causal_lm.py | 42 ++ .../for_causal_lm/llama_for_causal_lm_3.py | 40 ++ .../for_causal_lm/llama_for_causal_lm_3_1.py | 40 ++ .../for_causal_lm/llama_for_causal_lm_3_2.py | 40 ++ .../mistral_for_causal_lm_0_1.py | 40 ++ .../mistral_for_causal_lm_0_2.py | 40 ++ .../for_causal_lm/opt_for_causal_lm.py | 40 ++ .../for_causal_lm/peft_qwen3_for_causal_lm.py | 29 ++ .../peft_qwen3_for_causal_lm_2.py | 31 ++ .../for_causal_lm/phi3_for_causal_lm_3.py | 40 ++ .../for_causal_lm/phi3_for_causal_lm_3_5.py | 40 ++ .../for_causal_lm/qwen2_for_causal_lm_2_5.py | 40 ++ .../qwen2_for_causal_lm_2_5_coder.py | 40 ++ .../for_causal_lm/qwen3_for_causal_lm.py | 42 ++ .../for_causal_lm/qwen3_moe_for_causal_lm.py | 42 ++ .../small_qwen2_for_causal_lm_2_5.py | 41 ++ .../small_qwen3_for_causal_lm.py | 41 ++ .../for_conditional_generation/__init__.py | 0 .../for_conditional_generation/bart_model.py | 32 ++ .../gemma3_for_conditional_generation.py | 48 ++ .../gemma4_for_conditional_generation.py | 60 +++ .../idefics2_for_conditional_generation.py | 53 +++ .../idefics3_for_conditional_generation.py | 48 ++ .../internvl_for_conditional_generation.py | 48 ++ .../llava_for_conditional_generation.py | 48 ++ .../llava_next_for_conditional_generation.py | 54 +++ .../paligemma_for_conditional_generation.py | 48 ++ .../qwen2_5_vl_for_conditional_generation.py | 62 +++ .../qwen2_vl_for_conditional_generation.py | 57 +++ .../qwen3_5_for_conditional_generation.py | 62 +++ .../qwen3_vl_for_conditional_generation.py | 56 +++ .../smolvlm_for_conditional_generation.py | 48 ++ .../t5_for_conditional_generation.py | 33 ++ .../for_sequence_classification/__init__.py | 0 .../gpt_neox_for_sequence_classification.py | 41 ++ .../llama_for_sequence_classification_3_2.py | 41 ++ .../qwen2_for_sequence_classification_2_5.py | 43 ++ .../qwen3_for_sequence_classification.py | 43 ++ .../qwen3_moe_for_sequence_classification.py | 42 ++ tests/conftest.py | 1 + 55 files changed, 2343 insertions(+), 437 deletions(-) delete mode 100644 scripts/generate_tiny_models.py create mode 100644 scripts/generate_tiny_models/README.md create mode 100644 scripts/generate_tiny_models/__init__.py create mode 100644 scripts/generate_tiny_models/_common.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/__init__.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm_0528.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/falcon_mamba_for_causal_lm.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/gemma2_for_causal_lm.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/gemma_for_causal_lm.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/glm4_moe_for_causal_lm.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/gpt_neox_for_causal_lm.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/gpt_oss_for_causal_lm.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_1.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_2.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_1.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_2.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/opt_for_causal_lm.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm_2.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3_5.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5_coder.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/qwen3_moe_for_causal_lm.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/small_qwen2_for_causal_lm_2_5.py create mode 100644 scripts/generate_tiny_models/for_causal_lm/small_qwen3_for_causal_lm.py create mode 100644 scripts/generate_tiny_models/for_conditional_generation/__init__.py create mode 100644 scripts/generate_tiny_models/for_conditional_generation/bart_model.py create mode 100644 scripts/generate_tiny_models/for_conditional_generation/gemma3_for_conditional_generation.py create mode 100644 scripts/generate_tiny_models/for_conditional_generation/gemma4_for_conditional_generation.py create mode 100644 scripts/generate_tiny_models/for_conditional_generation/idefics2_for_conditional_generation.py create mode 100644 scripts/generate_tiny_models/for_conditional_generation/idefics3_for_conditional_generation.py create mode 100644 scripts/generate_tiny_models/for_conditional_generation/internvl_for_conditional_generation.py create mode 100644 scripts/generate_tiny_models/for_conditional_generation/llava_for_conditional_generation.py create mode 100644 scripts/generate_tiny_models/for_conditional_generation/llava_next_for_conditional_generation.py create mode 100644 scripts/generate_tiny_models/for_conditional_generation/paligemma_for_conditional_generation.py create mode 100644 scripts/generate_tiny_models/for_conditional_generation/qwen2_5_vl_for_conditional_generation.py create mode 100644 scripts/generate_tiny_models/for_conditional_generation/qwen2_vl_for_conditional_generation.py create mode 100644 scripts/generate_tiny_models/for_conditional_generation/qwen3_5_for_conditional_generation.py create mode 100644 scripts/generate_tiny_models/for_conditional_generation/qwen3_vl_for_conditional_generation.py create mode 100644 scripts/generate_tiny_models/for_conditional_generation/smolvlm_for_conditional_generation.py create mode 100644 scripts/generate_tiny_models/for_conditional_generation/t5_for_conditional_generation.py create mode 100644 scripts/generate_tiny_models/for_sequence_classification/__init__.py create mode 100644 scripts/generate_tiny_models/for_sequence_classification/gpt_neox_for_sequence_classification.py create mode 100644 scripts/generate_tiny_models/for_sequence_classification/llama_for_sequence_classification_3_2.py create mode 100644 scripts/generate_tiny_models/for_sequence_classification/qwen2_for_sequence_classification_2_5.py create mode 100644 scripts/generate_tiny_models/for_sequence_classification/qwen3_for_sequence_classification.py create mode 100644 scripts/generate_tiny_models/for_sequence_classification/qwen3_moe_for_sequence_classification.py diff --git a/scripts/generate_tiny_models.py b/scripts/generate_tiny_models.py deleted file mode 100644 index 193dee9e080..00000000000 --- a/scripts/generate_tiny_models.py +++ /dev/null @@ -1,437 +0,0 @@ -# Copyright 2020-2026 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This script generates tiny models used in the TRL library for unit tests. It pushes them to the Hub under the -# `trl-internal-testing` organization. -# This script is meant to be run when adding new tiny model to the TRL library. - -import torch -from huggingface_hub import HfApi, ModelCard -from peft import LoraConfig, get_peft_model -from torch import nn -from transformers import ( - AutoConfig, - AutoProcessor, - AutoTokenizer, - BartModel, - Cohere2Config, - Cohere2ForCausalLM, - CohereConfig, - CohereForCausalLM, - DeepseekV3Config, - DeepseekV3ForCausalLM, - FalconMambaConfig, - FalconMambaForCausalLM, - Gemma2Config, - Gemma2ForCausalLM, - Gemma3ForConditionalGeneration, - Gemma4ForConditionalGeneration, - GemmaConfig, - GemmaForCausalLM, - GenerationConfig, - Glm4MoeConfig, - Glm4MoeForCausalLM, - GPT2Config, - GPT2LMHeadModel, - GPTNeoXConfig, - GPTNeoXForCausalLM, - GPTNeoXForSequenceClassification, - GptOssConfig, - GptOssForCausalLM, - Idefics2Config, - Idefics2ForConditionalGeneration, - Idefics3ForConditionalGeneration, - InternVLForConditionalGeneration, - LlamaConfig, - LlamaForCausalLM, - LlamaForSequenceClassification, - LlavaForConditionalGeneration, - LlavaNextForConditionalGeneration, - MistralConfig, - MistralForCausalLM, - OPTConfig, - OPTForCausalLM, - PaliGemmaForConditionalGeneration, - Phi3Config, - Phi3ForCausalLM, - Qwen2_5_VLConfig, - Qwen2_5_VLForConditionalGeneration, - Qwen2Config, - Qwen2ForCausalLM, - Qwen2ForSequenceClassification, - Qwen2VLConfig, - Qwen2VLForConditionalGeneration, - Qwen3_5Config, - Qwen3_5ForConditionalGeneration, - Qwen3Config, - Qwen3ForCausalLM, - Qwen3ForSequenceClassification, - Qwen3MoeConfig, - Qwen3MoeForCausalLM, - Qwen3MoeForSequenceClassification, - Qwen3VLConfig, - Qwen3VLForConditionalGeneration, - SmolVLMForConditionalGeneration, - T5ForConditionalGeneration, -) - - -ORGANIZATION = "trl-internal-testing" - -MODEL_CARD = """ ---- -library_name: transformers -tags: [trl] ---- - -# Tiny {model_class_name} - -This is a minimal model built for unit tests in the [TRL](https://github.com/huggingface/trl) library. -""" - - -api = HfApi() - - -def push_to_hub(model, tokenizer, generation_config, prefix=None, suffix=None, force=False): - model_class_name = model.__class__.__name__ - content = MODEL_CARD.format(model_class_name=model_class_name) - model_card = ModelCard(content) - if prefix is not None: - model_class_name = f"{prefix}-{model_class_name}" - repo_id = f"{ORGANIZATION}/{model_class_name}" - if suffix is not None: - repo_id += f"-{suffix}" - - if api.repo_exists(repo_id) and not force: - print(f"Model {repo_id} already exists, skipping") - else: - model.push_to_hub(repo_id) - model_card.push_to_hub(repo_id) - if tokenizer is not None: - tokenizer.push_to_hub(repo_id) - if generation_config is not None: - generation_config.push_to_hub(repo_id) - - -def init_weights_tiny_model(model): - """ - Initialize tiny test models to avoid NaNs from uninitialized weights. - - Uses safe defaults: - - Linear/Conv1d: Xavier uniform (weights), zero (biases) - - Embedding: Normal(0, 0.02) - - LayerNorm: Ones (weights), zero (biases) - - Args: - model: PyTorch model (modified in-place) - """ - for module in model.modules(): - if isinstance(module, nn.Linear): - # Attention/MLP projections → Xavier or Normal - if module.bias is not None: - nn.init.zeros_(module.bias) - nn.init.xavier_uniform_(module.weight) - - elif isinstance(module, nn.Embedding): - # Token embeddings → GPT-style Normal - nn.init.normal_(module.weight, mean=0.0, std=0.02) - - elif isinstance(module, nn.LayerNorm): - # LayerNorm weights always 1, bias 0 - nn.init.ones_(module.weight) - if module.bias is not None: - nn.init.zeros_(module.bias) - - elif isinstance(module, nn.Conv1d): - # Convolutional layers → Xavier or Normal - if module.bias is not None: - nn.init.zeros_(module.bias) - nn.init.xavier_uniform_(module.weight) - - -# Decoder models -for model_id, config_class, model_class, dtype, suffix in [ - # ("bigscience/bloomz-560m", BloomConfig, BloomForCausalLM, None), # loading fails with this model, see https://huggingface.co/bigscience/bloomz-560m/discussions/14 - ("CohereLabs/aya-expanse-8b", CohereConfig, CohereForCausalLM, torch.float16, None), - ("CohereLabs/tiny-aya-earth", Cohere2Config, Cohere2ForCausalLM, torch.bfloat16, None), - ("deepseek-ai/DeepSeek-R1", DeepseekV3Config, DeepseekV3ForCausalLM, torch.bfloat16, None), - # It's important to have R1-0528 as it doesn't have the same chat template - ("deepseek-ai/DeepSeek-R1-0528", DeepseekV3Config, DeepseekV3ForCausalLM, torch.bfloat16, "0528"), - ("tiiuae/falcon-7b-instruct", FalconMambaConfig, FalconMambaForCausalLM, torch.bfloat16, None), - ("google/gemma-2-2b-it", Gemma2Config, Gemma2ForCausalLM, torch.bfloat16, None), - ("google/gemma-7b-it", GemmaConfig, GemmaForCausalLM, torch.bfloat16, None), - ("openai-community/gpt2", GPT2Config, GPT2LMHeadModel, torch.float32, None), - ("EleutherAI/pythia-14m", GPTNeoXConfig, GPTNeoXForCausalLM, torch.float16, None), - ("meta-llama/Meta-Llama-3-8B-Instruct", LlamaConfig, LlamaForCausalLM, torch.bfloat16, "3"), - ("meta-llama/Llama-3.1-8B-Instruct", LlamaConfig, LlamaForCausalLM, torch.bfloat16, "3.1"), - ("meta-llama/Llama-3.2-1B-Instruct", LlamaConfig, LlamaForCausalLM, torch.bfloat16, "3.2"), - ("mistralai/Mistral-7B-Instruct-v0.1", MistralConfig, MistralForCausalLM, torch.bfloat16, "0.1"), - ("mistralai/Mistral-7B-Instruct-v0.2", MistralConfig, MistralForCausalLM, torch.bfloat16, "0.2"), - ("facebook/opt-1.3b", OPTConfig, OPTForCausalLM, torch.float16, None), - ("microsoft/Phi-3-mini-4k-instruct", Phi3Config, Phi3ForCausalLM, torch.bfloat16, "3"), - ("microsoft/Phi-3.5-mini-instruct", Phi3Config, Phi3ForCausalLM, torch.bfloat16, "3.5"), - ("Qwen/Qwen2.5-32B-Instruct", Qwen2Config, Qwen2ForCausalLM, torch.bfloat16, "2.5"), - ("Qwen/Qwen2.5-Coder-0.5B", Qwen2Config, Qwen2ForCausalLM, torch.bfloat16, "2.5-Coder"), - ("Qwen/Qwen3-8B", Qwen3Config, Qwen3ForCausalLM, torch.bfloat16, None), -]: - revision = "refs/pr/14" if model_id == "Qwen/Qwen3-8B" else "main" # chat template with {% generation %} - tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision) - generation_config = GenerationConfig.from_pretrained(model_id, revision=revision) - config = config_class( - vocab_size=len(tokenizer.vocab), - hidden_size=8, - num_attention_heads=4, - num_key_value_heads=2, - num_hidden_layers=2, - intermediate_size=32, - ) - model = model_class(config).to(dtype=dtype) - init_weights_tiny_model(model) - push_to_hub(model, tokenizer, generation_config, "tiny", suffix) - -# MoE models -for model_id, config_class, model_class, dtype, suffix in [ - ("Qwen/Qwen3-30B-A3B", Qwen3MoeConfig, Qwen3MoeForCausalLM, torch.bfloat16, None), - ("openai/gpt-oss-20b", GptOssConfig, GptOssForCausalLM, torch.bfloat16, None), - ("zai-org/GLM-4.5", Glm4MoeConfig, Glm4MoeForCausalLM, torch.bfloat16, None), -]: - tokenizer = AutoTokenizer.from_pretrained(model_id) - generation_config = GenerationConfig.from_pretrained(model_id) - kwargs = {} - if model_id == "zai-org/GLM-4.5": - kwargs["n_routed_experts"] = 4 - elif model_id == "Qwen/Qwen3-30B-A3B": - kwargs["num_experts"] = 4 - elif model_id == "openai/gpt-oss-20b": - kwargs["num_local_experts"] = 4 - - config = config_class( - vocab_size=len(tokenizer.vocab), - hidden_size=8, - num_attention_heads=4, - num_key_value_heads=2, - num_hidden_layers=2, - intermediate_size=32, - num_experts_per_tok=2, - **kwargs, - ) - model = model_class(config).to(dtype=dtype) - init_weights_tiny_model(model) - push_to_hub(model, tokenizer, generation_config, "tiny", suffix) - -# Two slightly bigger models, required for vLLM testing -tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-32B-Instruct") -generation_config = GenerationConfig.from_pretrained("Qwen/Qwen2.5-32B-Instruct") -config = Qwen2Config( - vocab_size=len(tokenizer.vocab), - hidden_size=128, # increase hidden size so that hidden_size // num_attention_heads = 32, required for vLLM - num_attention_heads=4, - num_key_value_heads=2, - num_hidden_layers=2, - intermediate_size=32, -) -model = Qwen2ForCausalLM(config).to(dtype=torch.bfloat16) -push_to_hub(model, tokenizer, generation_config, "small", "2.5") - -tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-4B") -generation_config = GenerationConfig.from_pretrained("Qwen/Qwen3-4B") -config = Qwen3Config( - vocab_size=len(tokenizer.vocab), - hidden_size=128, # increase hidden size so that hidden_size // num_attention_heads = 32, required for vLLM - num_attention_heads=4, - num_key_value_heads=2, - num_hidden_layers=2, - intermediate_size=32, -) -model = Qwen3ForCausalLM(config).to(dtype=torch.bfloat16) -push_to_hub(model, tokenizer, generation_config, "small") - -# Reward models -for model_id, model_class, dtype, suffix in [ - ("EleutherAI/pythia-14m", GPTNeoXForSequenceClassification, torch.bfloat16, None), - ("meta-llama/Llama-3.2-1B-Instruct", LlamaForSequenceClassification, torch.bfloat16, "3.2"), - ("Qwen/Qwen2.5-32B-Instruct", Qwen2ForSequenceClassification, torch.bfloat16, "2.5"), - ("Qwen/Qwen3-4B", Qwen3ForSequenceClassification, torch.bfloat16, None), -]: - tokenizer = AutoTokenizer.from_pretrained(model_id) - generation_config = GenerationConfig.from_pretrained(model_id) - kwargs = { - "num_labels": 1, - "hidden_size": 16, - "num_attention_heads": 4, - "num_key_value_heads": 2, - "num_hidden_layers": 2, - "intermediate_size": 32, - } - config = AutoConfig.from_pretrained(model_id, **kwargs) - # Bug in transformers: it ignores num_hidden_layers to build layer_types - if model_id in ("Qwen/Qwen2.5-32B-Instruct", "Qwen/Qwen3-4B"): - config.layer_types = config.layer_types[:2] - model = model_class(config).to(dtype=dtype) - init_weights_tiny_model(model) - push_to_hub(model, tokenizer, generation_config, "tiny", suffix) - -# MoE Reward models -for model_id, model_class, dtype, suffix in [ - ("Qwen/Qwen3-30B-A3B", Qwen3MoeForSequenceClassification, torch.bfloat16, None), -]: - tokenizer = AutoTokenizer.from_pretrained(model_id) - generation_config = GenerationConfig.from_pretrained(model_id) - kwargs = { - "num_labels": 1, - "hidden_size": 16, - "num_attention_heads": 4, - "num_key_value_heads": 2, - "num_hidden_layers": 2, - "intermediate_size": 32, - "num_experts": 4, - "num_experts_per_tok": 2, - } - config = AutoConfig.from_pretrained(model_id, **kwargs) - model = model_class(config).to(dtype=dtype) - push_to_hub(model, tokenizer, generation_config, "tiny", suffix) - - -# Encoder-decoder models -for model_id, model_class, dtype, suffix in [ - ("facebook/bart-base", BartModel, torch.float32, None), - ("google/flan-t5-small", T5ForConditionalGeneration, torch.float32, None), -]: - tokenizer = AutoTokenizer.from_pretrained(model_id) - generation_config = GenerationConfig.from_pretrained(model_id) if model_id != "facebook/bart-base" else None - config = AutoConfig.from_pretrained(model_id) - config.d_model = 24 - model = model_class(config).to(dtype=dtype) - push_to_hub(model, tokenizer, generation_config, "tiny", suffix) - - -# Vision Language Models -for model_id, model_class, dtype in [ - ("google/gemma-3-4b-it", Gemma3ForConditionalGeneration, torch.bfloat16), - ("google/gemma-4-E2B-it", Gemma4ForConditionalGeneration, torch.bfloat16), - ("google/paligemma-3b-pt-224", PaliGemmaForConditionalGeneration, torch.float32), - ("HuggingFaceM4/idefics2-8b", Idefics2ForConditionalGeneration, torch.float32), - ("HuggingFaceM4/Idefics3-8B-Llama3", Idefics3ForConditionalGeneration, torch.bfloat16), - ("HuggingFaceTB/SmolVLM2-2.2B-Instruct", SmolVLMForConditionalGeneration, torch.float32), - ("llava-hf/llava-1.5-7b-hf", LlavaForConditionalGeneration, torch.float16), - # Original model dtype is float16, but it triggers CUDA device side assert error (see GH-4741): - ("llava-hf/llava-v1.6-mistral-7b-hf", LlavaNextForConditionalGeneration, torch.bfloat16), - ("OpenGVLab/InternVL3-8B-hf", InternVLForConditionalGeneration, torch.bfloat16), - ("Qwen/Qwen2-VL-2B-Instruct", Qwen2VLForConditionalGeneration, torch.bfloat16), - ("Qwen/Qwen2.5-VL-3B-Instruct", Qwen2_5_VLForConditionalGeneration, torch.bfloat16), - ("Qwen/Qwen3-VL-2B-Instruct", Qwen3VLForConditionalGeneration, torch.bfloat16), - ("Qwen/Qwen3.5-0.8B", Qwen3_5ForConditionalGeneration, torch.bfloat16), -]: - processor = AutoProcessor.from_pretrained(model_id) - generation_config = GenerationConfig.from_pretrained(model_id) if model_id != "Qwen/Qwen3.5-0.8B" else None - - text_config = { - "num_hidden_layers": 2, - "hidden_size": 16, - "num_attention_heads": 4, - "num_key_value_heads": 2, - "layer_types": None, # Set it automatically from num_hidden_layers - } - vision_config = { - "num_hidden_layers": 2, - "hidden_size": 16, - "num_attention_heads": 4, - "num_key_value_heads": 2, - "embed_dim": 64, - } - kwargs = {} - - if issubclass(model_class.config_class, (Qwen2VLConfig, Qwen2_5_VLConfig)): - text_config["rope_scaling"] = {"type": "default", "mrope_section": [1, 1], "rope_type": "default"} - vision_config["depth"] = 2 - # Different dict object from text_config; see GH-4101 and transformers#41020 - kwargs["rope_scaling"] = {"type": "default", "mrope_section": [1, 1], "rope_type": "default"} - - if issubclass(model_class.config_class, Qwen2_5_VLConfig): - vision_config["out_hidden_size"] = 16 - # Different dict object at the config root; see GH-4101 and transformers#41020 - kwargs["num_hidden_layers"] = 2 - kwargs["hidden_size"] = 16 - kwargs["num_attention_heads"] = 4 - - if issubclass(model_class.config_class, Idefics2Config): - kwargs["perceiver_config"] = {"hidden_size": 16} - - if issubclass(model_class.config_class, Qwen3VLConfig): - # So hasattr(config, "layer_types") is False - # See: https://github.com/huggingface/transformers/blob/fe5ca9ddaa07fac2872407e75c7a7661216ac956/src/transformers/models/qwen3_vl/modeling_qwen3_vl.py#L420 - del text_config["layer_types"] - # "mrope_section" needs 3 elements: for dim, offset in enumerate((1, 2), start=1): mrope_section[dim] - # See: https://github.com/huggingface/transformers/blob/fe5ca9ddaa07fac2872407e75c7a7661216ac956/src/transformers/models/qwen3_vl/modeling_qwen3_vl.py#L361 - text_config["rope_scaling"] = {"mrope_interleaved": True, "mrope_section": [2, 2, 2], "rope_type": "default"} - vision_config["depth"] = 2 - vision_config["out_hidden_size"] = 16 - - if issubclass(model_class.config_class, Qwen3_5Config): - # For tiny layer counts, default `layer_types` can end up with no full-attention layers (e.g. 2 layers and - # default interval 4), which breaks Qwen3.5 dynamic cache logic. Keep one full-attention layer at the end. - text_config["layer_types"] = ["linear_attention", "full_attention"] - text_config["full_attention_interval"] = 2 - # Qwen3.5-VL vision config expects `depth`/`num_heads`, not `num_hidden_layers`/`num_attention_heads`. - vision_config.pop("num_hidden_layers", None) - vision_config.pop("num_attention_heads", None) - vision_config.pop("num_key_value_heads", None) - vision_config.pop("embed_dim", None) - vision_config["depth"] = 2 - vision_config["num_heads"] = 4 - vision_config["intermediate_size"] = 32 - vision_config["out_hidden_size"] = 16 - - if model_id == "llava-hf/llava-v1.6-mistral-7b-hf": - # Hotfix: llava-hf/llava-v1.6-mistral-7b-hf mistakesly sets text_config.dtype to "bfloat16". - # See https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf/discussions/46 - text_config["dtype"] = None - - if model_class is Gemma4ForConditionalGeneration: - # Gemma4 rope validation fails when passing text_config as a dict, so we mutate the config directly. - config = AutoConfig.from_pretrained(model_id) - for k, v in text_config.items(): - setattr(config.text_config, k, v) - for k, v in vision_config.items(): - setattr(config.vision_config, k, v) - config.text_config.layer_types = ["sliding_attention", "full_attention"] - config.text_config.num_kv_shared_layers = 0 - config.text_config.global_head_dim = 8 - config.text_config.hidden_size_per_layer_input = 16 - config.audio_config = None - else: - config = AutoConfig.from_pretrained(model_id, text_config=text_config, vision_config=vision_config, **kwargs) - model = model_class(config).to(dtype=dtype) - - if issubclass(model_class.config_class, Qwen3_5Config): - # Qwen3.5 models has some weights in float32, to mirror this in the tiny model we need to convert them to float32 manually. - for layer in model.model.language_model.layers: - if hasattr(layer, "linear_attn"): # applies to linear attention layers only - layer.linear_attn.A_log.data = layer.linear_attn.A_log.data.float() - layer.linear_attn.norm.weight.data = layer.linear_attn.norm.weight.data.float() - - push_to_hub(model, processor, generation_config, "tiny") - -# PEFT models -model = Qwen3ForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen3ForCausalLM", dtype="auto") -model = get_peft_model(model, LoraConfig()) -generation_config = GenerationConfig.from_pretrained("trl-internal-testing/tiny-Qwen3ForCausalLM") -push_to_hub(model, None, None, "tiny") - -# Same model, but different weights -model = Qwen3ForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen3ForCausalLM", dtype="auto") -model = get_peft_model(model, LoraConfig()) -generation_config = GenerationConfig.from_pretrained("trl-internal-testing/tiny-Qwen3ForCausalLM") -push_to_hub(model, None, None, "tiny", "2") diff --git a/scripts/generate_tiny_models/README.md b/scripts/generate_tiny_models/README.md new file mode 100644 index 00000000000..14d96a793c7 --- /dev/null +++ b/scripts/generate_tiny_models/README.md @@ -0,0 +1,50 @@ +# Tiny model generation + +This directory contains one script per tiny model used by the TRL test suite. Each script builds a random-weight, minimally-sized model on top of a real tokenizer/processor and pushes it to the `trl-internal-testing` organization on the Hub. + +## Layout + +``` +generate_tiny_models/ +├── _common.py # shared helpers (push_to_hub, smoke_test, print_config_diff, ...) +├── for_causal_lm/ # *ForCausalLM + GPT-2 LM head + small/PEFT variants +├── for_sequence_classification/ # *ForSequenceClassification (reward models) +└── for_conditional_generation/ # *ForConditionalGeneration (VLMs + T5 + Bart encoder-decoder) +``` + +## Running + +From the repo root, invoke a script by its module path: + +```bash +python -m scripts.generate_tiny_models.for_causal_lm.qwen3_for_causal_lm +``` + +Each script: + +1. Checks that the installed `transformers` version matches the one pinned in the script (fails otherwise). +2. Builds the tiny model with random weights. +3. Runs `smoke_test` — a minimal forward pass to catch config misspecification and NaNs. +4. Runs `print_config_diff` — prints every flat-key difference between the reference Hub config and the tiny model's config (for debugging scale-downs). +5. Pushes the model, tokenizer/processor, generation config, and model card to the Hub. + +If the repo already exists on the Hub, the push is skipped (pass `force=True` in `push_to_hub(...)` to overwrite). + +## Version pinning + +Every script declares `TRANSFORMERS_VERSION = "X.Y.Z"`, which is: + +``` +max(version that introduced the model, TRL's transformers floor) +``` + +The floor (currently `4.56.2`) is the `transformers>=` bound from `pyproject.toml`. Scripts for models introduced after the floor pin a higher version (e.g. Qwen3-VL pins `4.57.0`, Gemma4 pins `5.6.0`). The check is an exact match via `packaging.version.Version`; install the pinned version before running. + +**Why exact?** transformers is backward-compatible (a checkpoint saved by X loads on any ≥ X) but not forward-compatible. TRL CI runs against the floor, so tiny models must be saved with the oldest version that supports them — any newer save risks using config fields the floor can't parse. The exact-match check prevents accidental drift. + +## Adding a new tiny model + +1. Pick the right subfolder based on the model class suffix (`ForCausalLM`, `ForSequenceClassification`, `ForConditionalGeneration`). +2. Copy an existing script with the closest shape and adapt it — reference model id, config class, model class, special kwargs. +3. Set `TRANSFORMERS_VERSION` to the release that introduced the model (or to the TRL floor, whichever is higher). +4. Run it. Inspect the `[smoke_test]` and `[config_diff]` output before letting it push. diff --git a/scripts/generate_tiny_models/__init__.py b/scripts/generate_tiny_models/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/scripts/generate_tiny_models/_common.py b/scripts/generate_tiny_models/_common.py new file mode 100644 index 00000000000..1f713853f59 --- /dev/null +++ b/scripts/generate_tiny_models/_common.py @@ -0,0 +1,253 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Shared utilities for the tiny-model generation scripts in this directory. +# Each sibling script builds a single tiny model and pushes it to the Hub under +# the `trl-internal-testing` organization. + +import argparse +import os +import tempfile + +import torch +from huggingface_hub import CommitOperationAdd, HfApi, ModelCard +from packaging.version import Version +from torch import nn +from transformers import AutoConfig + + +ORGANIZATION = "trl-internal-testing" + +MODEL_CARD = """ +--- +library_name: transformers +tags: [trl] +--- + +# Tiny {model_class_name} + +This is a minimal model built for unit tests in the [TRL](https://github.com/huggingface/trl) library. +""" + + +api = HfApi() + + +def check_transformers_version(expected_version): + """Raise unless the installed transformers matches `expected_version` exactly.""" + import transformers + + if Version(transformers.__version__) != Version(expected_version): + raise RuntimeError( + f"This script requires transformers=={expected_version}, " f"but {transformers.__version__} is installed." + ) + + +def smoke_test(model, tokenizer_or_processor=None): + """Run a minimal forward pass to sanity-check the tiny model doesn't crash or produce NaNs.""" + model.eval() + device = next(model.parameters()).device + + if tokenizer_or_processor is not None and hasattr(tokenizer_or_processor, "image_processor"): + # VLM path: build a dummy (image, text) input via the processor. + from PIL import Image + + processor = tokenizer_or_processor + red = Image.new("RGB", (24, 24), color="red") + blue = Image.new("RGB", (24, 24), color="blue") + messages = [ + [{"role": "user", "content": [{"type": "image", "image": red}, {"type": "text", "text": "What is this?"}]}], + [{"role": "user", "content": [{"type": "text", "text": "Is it blue?"}, {"type": "image", "image": blue}]}], + ] + inputs = processor.apply_chat_template( + conversation=messages, + add_generation_prompt=True, + tokenize=True, + return_dict=True, + return_tensors="pt", + padding=True, + ).to(device) + else: + inputs = {"input_ids": torch.tensor([[1, 2, 3, 4]], device=device)} + + with torch.no_grad(): + out = model(**inputs) + + logits = getattr(out, "logits", None) + if logits is None: + logits = getattr(out, "last_hidden_state", None) + if logits is None: + raise RuntimeError(f"[smoke_test] {model.__class__.__name__}: no logits or last_hidden_state on output") + if torch.isnan(logits).any(): + raise RuntimeError(f"[smoke_test] {model.__class__.__name__}: NaN in forward output") + print(f"[smoke_test] {model.__class__.__name__}: OK (output shape {tuple(logits.shape)})") + + +def _flatten(d, prefix=""): + out = {} + for k, v in d.items(): + key = f"{prefix}{k}" if prefix else k + if isinstance(v, dict): + out.update(_flatten(v, f"{key}.")) + else: + out[key] = v + return out + + +_DIFF_IGNORE = {"_name_or_path", "transformers_version", "architectures", "model_type", "torch_dtype", "dtype"} + + +_TORCH_TO_SAFETENSORS_DTYPE = { + torch.float32: "F32", + torch.float16: "F16", + torch.bfloat16: "BF16", + torch.float64: "F64", + torch.int8: "I8", + torch.int16: "I16", + torch.int32: "I32", + torch.int64: "I64", + torch.uint8: "U8", + torch.bool: "BOOL", +} + + +def check_dtype_pattern(reference_id, model): + """Flag tensors whose dtype diverges from the reference checkpoint. + + Reads the reference safetensors header via the Hub API (no weight download). Useful to catch cases + like Qwen3.5 where specific params (e.g. linear_attn.A_log) are kept in fp32 while the rest is bf16. + """ + metadata = api.get_safetensors_metadata(reference_id) + ref_dtypes = {name: info.dtype for fm in metadata.files_metadata.values() for name, info in fm.tensors.items()} + + mismatches = [] + for name, tensor in model.state_dict().items(): + ref_dtype = ref_dtypes.get(name) + if ref_dtype is None: + continue # tensor has no counterpart in the reference (e.g. scale-down, PEFT wrapper, tying) + tiny_dtype = _TORCH_TO_SAFETENSORS_DTYPE.get(tensor.dtype) + if tiny_dtype != ref_dtype: + mismatches.append((name, ref_dtype, tiny_dtype)) + + if not mismatches: + print(f"[dtype_check] {reference_id}: all matched tensors have the reference dtype") + return + + print(f"[dtype_check] {reference_id}: {len(mismatches)} tensors differ from reference:") + for name, ref, tiny in mismatches: + print(f" {name}: reference={ref}, tiny={tiny}") + + +def print_config_diff(reference_id, model): + """Print the flat, recursive diff between the reference Hub config and the tiny-model config.""" + reference_config = AutoConfig.from_pretrained(reference_id) + ref_flat = _flatten(reference_config.to_dict()) + tiny_flat = _flatten(model.config.to_dict()) + + keys = sorted(set(ref_flat) | set(tiny_flat)) + rows = [] + for k in keys: + if any(k == ig or k.endswith(f".{ig}") for ig in _DIFF_IGNORE): + continue + rv, tv = ref_flat.get(k, ""), tiny_flat.get(k, "") + if rv != tv: + rows.append((k, rv, tv)) + + print(f"[config_diff] {reference_id} vs tiny ({len(rows)} differences)") + for k, r, t in rows: + print(f" {k:48s} {str(r)[:34]:34s} → {str(t)[:34]}") + + +def _parse_args(): + parser = argparse.ArgumentParser(add_help=False) + parser.add_argument( + "--create-pr", + action="store_true", + help="If the repo already exists, open a PR instead of skipping.", + ) + args, _ = parser.parse_known_args() + return args + + +def push_to_hub(model, tokenizer, generation_config, prefix=None, suffix=None, force=False, create_pr=None): + if create_pr is None: + create_pr = _parse_args().create_pr + + model_class_name = model.__class__.__name__ + content = MODEL_CARD.format(model_class_name=model_class_name) + model_card = ModelCard(content) + if prefix is not None: + model_class_name = f"{prefix}-{model_class_name}" + repo_id = f"{ORGANIZATION}/{model_class_name}" + if suffix is not None: + repo_id += f"-{suffix}" + + exists = api.repo_exists(repo_id) + if exists and not force and not create_pr: + print(f"Model {repo_id} already exists, skipping (pass --create-pr to open a PR)") + return + + if not exists: + api.create_repo(repo_id, exist_ok=True) + + # Save all artifacts to a temp dir and upload them in a single commit, so --create-pr opens one PR. + with tempfile.TemporaryDirectory() as tmpdir: + model.save_pretrained(tmpdir) + if tokenizer is not None: + tokenizer.save_pretrained(tmpdir) + if generation_config is not None: + generation_config.save_pretrained(tmpdir) + model_card.save(os.path.join(tmpdir, "README.md")) + + operations = [ + CommitOperationAdd(path_in_repo=os.path.relpath(os.path.join(root, name), tmpdir), + path_or_fileobj=os.path.join(root, name)) + for root, _, files in os.walk(tmpdir) + for name in files + ] + api.create_commit( + repo_id=repo_id, + operations=operations, + commit_message=f"Upload tiny {model.__class__.__name__}", + create_pr=exists and create_pr, + ) + + +def init_weights_tiny_model(model): + """ + Initialize tiny test models to avoid NaNs from uninitialized weights. + + Uses safe defaults: + - Linear/Conv1d: Xavier uniform (weights), zero (biases) + - Embedding: Normal(0, 0.02) + - LayerNorm: Ones (weights), zero (biases) + """ + for module in model.modules(): + if isinstance(module, nn.Linear): + if module.bias is not None: + nn.init.zeros_(module.bias) + nn.init.xavier_uniform_(module.weight) + + elif isinstance(module, nn.Embedding): + nn.init.normal_(module.weight, mean=0.0, std=0.02) + + elif isinstance(module, nn.LayerNorm): + nn.init.ones_(module.weight) + if module.bias is not None: + nn.init.zeros_(module.bias) + + elif isinstance(module, nn.Conv1d): + if module.bias is not None: + nn.init.zeros_(module.bias) + nn.init.xavier_uniform_(module.weight) diff --git a/scripts/generate_tiny_models/for_causal_lm/__init__.py b/scripts/generate_tiny_models/for_causal_lm/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py new file mode 100644 index 00000000000..fe1d72eae89 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py @@ -0,0 +1,40 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, Cohere2Config, Cohere2ForCausalLM, GenerationConfig + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "CohereLabs/tiny-aya-earth" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = Cohere2Config( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = Cohere2ForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py new file mode 100644 index 00000000000..48336e33ecc --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py @@ -0,0 +1,40 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, CohereConfig, CohereForCausalLM, GenerationConfig + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "CohereLabs/aya-expanse-8b" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = CohereConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = CohereForCausalLM(config).to(dtype=torch.float16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py new file mode 100644 index 00000000000..0c29cffbae1 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py @@ -0,0 +1,40 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, DeepseekV3Config, DeepseekV3ForCausalLM, GenerationConfig + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "deepseek-ai/DeepSeek-R1" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = DeepseekV3Config( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = DeepseekV3ForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm_0528.py b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm_0528.py new file mode 100644 index 00000000000..a29bd6cec0e --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm_0528.py @@ -0,0 +1,42 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Note: R1-0528 is kept in addition to R1 because it has a different chat template. + +import torch +from transformers import AutoTokenizer, DeepseekV3Config, DeepseekV3ForCausalLM, GenerationConfig + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "deepseek-ai/DeepSeek-R1-0528" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = DeepseekV3Config( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = DeepseekV3ForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "0528") diff --git a/scripts/generate_tiny_models/for_causal_lm/falcon_mamba_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/falcon_mamba_for_causal_lm.py new file mode 100644 index 00000000000..502bdd35831 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/falcon_mamba_for_causal_lm.py @@ -0,0 +1,40 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, FalconMambaConfig, FalconMambaForCausalLM, GenerationConfig + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "tiiuae/falcon-7b-instruct" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = FalconMambaConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = FalconMambaForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/gemma2_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gemma2_for_causal_lm.py new file mode 100644 index 00000000000..3d96bd09a9a --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/gemma2_for_causal_lm.py @@ -0,0 +1,40 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, Gemma2Config, Gemma2ForCausalLM, GenerationConfig + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "google/gemma-2-2b-it" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = Gemma2Config( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = Gemma2ForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/gemma_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gemma_for_causal_lm.py new file mode 100644 index 00000000000..b391e48473a --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/gemma_for_causal_lm.py @@ -0,0 +1,40 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GemmaConfig, GemmaForCausalLM, GenerationConfig + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "google/gemma-7b-it" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = GemmaConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = GemmaForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/glm4_moe_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/glm4_moe_for_causal_lm.py new file mode 100644 index 00000000000..c07297fd00e --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/glm4_moe_for_causal_lm.py @@ -0,0 +1,42 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, Glm4MoeConfig, Glm4MoeForCausalLM + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "zai-org/GLM-4.5" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = Glm4MoeConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, + n_routed_experts=4, + num_experts_per_tok=2, +) +model = Glm4MoeForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py b/scripts/generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py new file mode 100644 index 00000000000..758841b0b33 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py @@ -0,0 +1,40 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, GPT2Config, GPT2LMHeadModel + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "openai-community/gpt2" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = GPT2Config( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = GPT2LMHeadModel(config).to(dtype=torch.float32) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/gpt_neox_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gpt_neox_for_causal_lm.py new file mode 100644 index 00000000000..f2eed955e9f --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/gpt_neox_for_causal_lm.py @@ -0,0 +1,40 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, GPTNeoXConfig, GPTNeoXForCausalLM + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "EleutherAI/pythia-14m" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = GPTNeoXConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = GPTNeoXForCausalLM(config).to(dtype=torch.float16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/gpt_oss_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gpt_oss_for_causal_lm.py new file mode 100644 index 00000000000..88b100da305 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/gpt_oss_for_causal_lm.py @@ -0,0 +1,42 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, GptOssConfig, GptOssForCausalLM + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "openai/gpt-oss-20b" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = GptOssConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, + num_local_experts=4, + num_experts_per_tok=2, +) +model = GptOssForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3.py b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3.py new file mode 100644 index 00000000000..41140f1b431 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3.py @@ -0,0 +1,40 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = LlamaConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = LlamaForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "3") diff --git a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_1.py b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_1.py new file mode 100644 index 00000000000..8ddf1a3a5cb --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_1.py @@ -0,0 +1,40 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = LlamaConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = LlamaForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "3.1") diff --git a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_2.py b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_2.py new file mode 100644 index 00000000000..d6396fdc11e --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_2.py @@ -0,0 +1,40 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = LlamaConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = LlamaForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "3.2") diff --git a/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_1.py b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_1.py new file mode 100644 index 00000000000..33ed69fd2c4 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_1.py @@ -0,0 +1,40 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, MistralConfig, MistralForCausalLM + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.1" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = MistralConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = MistralForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "0.1") diff --git a/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_2.py b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_2.py new file mode 100644 index 00000000000..1463c60c9ab --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_2.py @@ -0,0 +1,40 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, MistralConfig, MistralForCausalLM + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = MistralConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = MistralForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "0.2") diff --git a/scripts/generate_tiny_models/for_causal_lm/opt_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/opt_for_causal_lm.py new file mode 100644 index 00000000000..dd34ffeb4ca --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/opt_for_causal_lm.py @@ -0,0 +1,40 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, OPTConfig, OPTForCausalLM + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "facebook/opt-1.3b" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = OPTConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = OPTForCausalLM(config).to(dtype=torch.float16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm.py new file mode 100644 index 00000000000..c1e4b8c261e --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm.py @@ -0,0 +1,29 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from peft import LoraConfig, get_peft_model +from transformers import GenerationConfig, Qwen3ForCausalLM + +from .._common import check_transformers_version, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +BASE = "trl-internal-testing/tiny-Qwen3ForCausalLM" + +model = Qwen3ForCausalLM.from_pretrained(BASE, dtype="auto") +model = get_peft_model(model, LoraConfig()) +generation_config = GenerationConfig.from_pretrained(BASE) +smoke_test(model, None) +push_to_hub(model, None, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm_2.py b/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm_2.py new file mode 100644 index 00000000000..1d0da0a62c3 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm_2.py @@ -0,0 +1,31 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Same model class as peft_qwen3_for_causal_lm.py, with different (random) LoRA weights. + +from peft import LoraConfig, get_peft_model +from transformers import GenerationConfig, Qwen3ForCausalLM + +from .._common import check_transformers_version, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +BASE = "trl-internal-testing/tiny-Qwen3ForCausalLM" + +model = Qwen3ForCausalLM.from_pretrained(BASE, dtype="auto") +model = get_peft_model(model, LoraConfig()) +generation_config = GenerationConfig.from_pretrained(BASE) +smoke_test(model, None) +push_to_hub(model, None, generation_config, "tiny", "2") diff --git a/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3.py b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3.py new file mode 100644 index 00000000000..3dbe53eb51b --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3.py @@ -0,0 +1,40 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, Phi3Config, Phi3ForCausalLM + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "microsoft/Phi-3-mini-4k-instruct" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = Phi3Config( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = Phi3ForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "3") diff --git a/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3_5.py b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3_5.py new file mode 100644 index 00000000000..9685f638b9e --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3_5.py @@ -0,0 +1,40 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, Phi3Config, Phi3ForCausalLM + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "microsoft/Phi-3.5-mini-instruct" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = Phi3Config( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = Phi3ForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "3.5") diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5.py b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5.py new file mode 100644 index 00000000000..81ca31f7957 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5.py @@ -0,0 +1,40 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen2.5-32B-Instruct" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = Qwen2Config( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = Qwen2ForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "2.5") diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5_coder.py b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5_coder.py new file mode 100644 index 00000000000..18af3689ef5 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5_coder.py @@ -0,0 +1,40 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen2.5-Coder-0.5B" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = Qwen2Config( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = Qwen2ForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "2.5-Coder") diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm.py new file mode 100644 index 00000000000..cea498065e7 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm.py @@ -0,0 +1,42 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, Qwen3Config, Qwen3ForCausalLM + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen3-8B" +# Revision pins the chat template PR with `{% generation %}` support. +REVISION = "refs/pr/14" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, revision=REVISION) +generation_config = GenerationConfig.from_pretrained(MODEL_ID, revision=REVISION) +config = Qwen3Config( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = Qwen3ForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen3_moe_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/qwen3_moe_for_causal_lm.py new file mode 100644 index 00000000000..96f7e74059f --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/qwen3_moe_for_causal_lm.py @@ -0,0 +1,42 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoTokenizer, GenerationConfig, Qwen3MoeConfig, Qwen3MoeForCausalLM + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen3-30B-A3B" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = Qwen3MoeConfig( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, + num_experts=4, + num_experts_per_tok=2, +) +model = Qwen3MoeForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/small_qwen2_for_causal_lm_2_5.py b/scripts/generate_tiny_models/for_causal_lm/small_qwen2_for_causal_lm_2_5.py new file mode 100644 index 00000000000..a19f39a4cf4 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/small_qwen2_for_causal_lm_2_5.py @@ -0,0 +1,41 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Slightly bigger than the "tiny" variant: vLLM requires hidden_size // num_attention_heads = 32. + +import torch +from transformers import AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen2.5-32B-Instruct" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = Qwen2Config( + vocab_size=len(tokenizer.vocab), + hidden_size=128, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = Qwen2ForCausalLM(config).to(dtype=torch.bfloat16) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "small", "2.5") diff --git a/scripts/generate_tiny_models/for_causal_lm/small_qwen3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/small_qwen3_for_causal_lm.py new file mode 100644 index 00000000000..2af5ba70df3 --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/small_qwen3_for_causal_lm.py @@ -0,0 +1,41 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Slightly bigger than the "tiny" variant: vLLM requires hidden_size // num_attention_heads = 32. + +import torch +from transformers import AutoTokenizer, GenerationConfig, Qwen3Config, Qwen3ForCausalLM + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen3-4B" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = Qwen3Config( + vocab_size=len(tokenizer.vocab), + hidden_size=128, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = Qwen3ForCausalLM(config).to(dtype=torch.bfloat16) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "small") diff --git a/scripts/generate_tiny_models/for_conditional_generation/__init__.py b/scripts/generate_tiny_models/for_conditional_generation/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/scripts/generate_tiny_models/for_conditional_generation/bart_model.py b/scripts/generate_tiny_models/for_conditional_generation/bart_model.py new file mode 100644 index 00000000000..aa180d5e119 --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/bart_model.py @@ -0,0 +1,32 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoTokenizer, BartModel + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "facebook/bart-base" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +config = AutoConfig.from_pretrained(MODEL_ID) +config.d_model = 24 +model = BartModel(config).to(dtype=torch.float32) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, None, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/gemma3_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/gemma3_for_conditional_generation.py new file mode 100644 index 00000000000..7c928061f61 --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/gemma3_for_conditional_generation.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# python -m scripts.generate_tiny_models.for_conditional_generation.gemma3_for_conditional_generation + +import torch +from transformers import AutoConfig, AutoProcessor, Gemma3ForConditionalGeneration, GenerationConfig + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "google/gemma-3-4b-it" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": None, # Set it automatically from num_hidden_layers +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, +} + +config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config) +model = Gemma3ForConditionalGeneration(config).to(dtype=torch.bfloat16) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/gemma4_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/gemma4_for_conditional_generation.py new file mode 100644 index 00000000000..23d1ddbeed0 --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/gemma4_for_conditional_generation.py @@ -0,0 +1,60 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Gemma4 rope validation fails when passing text_config as a dict through AutoConfig, +# so the config is loaded first and then mutated in place. + +import torch +from transformers import AutoConfig, AutoProcessor, Gemma4ForConditionalGeneration, GenerationConfig + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "5.6.0" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "google/gemma-4-E2B-it" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, +} + +config = AutoConfig.from_pretrained(MODEL_ID) +for k, v in text_config.items(): + setattr(config.text_config, k, v) +for k, v in vision_config.items(): + setattr(config.vision_config, k, v) +config.text_config.layer_types = ["sliding_attention", "full_attention"] +config.text_config.num_kv_shared_layers = 0 +config.text_config.global_head_dim = 8 +config.text_config.hidden_size_per_layer_input = 16 +config.audio_config = None + +model = Gemma4ForConditionalGeneration(config).to(dtype=torch.bfloat16) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/idefics2_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/idefics2_for_conditional_generation.py new file mode 100644 index 00000000000..74b61fac6bc --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/idefics2_for_conditional_generation.py @@ -0,0 +1,53 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoProcessor, GenerationConfig, Idefics2ForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "HuggingFaceM4/idefics2-8b" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": None, +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, +} + +config = AutoConfig.from_pretrained( + MODEL_ID, + text_config=text_config, + vision_config=vision_config, + perceiver_config={"hidden_size": 16}, +) +model = Idefics2ForConditionalGeneration(config).to(dtype=torch.float32) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/idefics3_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/idefics3_for_conditional_generation.py new file mode 100644 index 00000000000..fd6e71c3cc0 --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/idefics3_for_conditional_generation.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoProcessor, GenerationConfig, Idefics3ForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "HuggingFaceM4/Idefics3-8B-Llama3" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": None, +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, +} + +config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config) +model = Idefics3ForConditionalGeneration(config).to(dtype=torch.bfloat16) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/internvl_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/internvl_for_conditional_generation.py new file mode 100644 index 00000000000..de9ef6b6448 --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/internvl_for_conditional_generation.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoProcessor, GenerationConfig, InternVLForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "OpenGVLab/InternVL3-8B-hf" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": None, +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, +} + +config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config) +model = InternVLForConditionalGeneration(config).to(dtype=torch.bfloat16) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/llava_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/llava_for_conditional_generation.py new file mode 100644 index 00000000000..cbc404ed11e --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/llava_for_conditional_generation.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoProcessor, GenerationConfig, LlavaForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "llava-hf/llava-1.5-7b-hf" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": None, +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, +} + +config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config) +model = LlavaForConditionalGeneration(config).to(dtype=torch.float16) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/llava_next_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/llava_next_for_conditional_generation.py new file mode 100644 index 00000000000..f4505b885bb --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/llava_next_for_conditional_generation.py @@ -0,0 +1,54 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Original model dtype is float16, but it triggers CUDA device-side assert on generation (see GH-4741), +# so this tiny model is saved in bfloat16. +# Upstream hotfix: llava-hf/llava-v1.6-mistral-7b-hf mistakenly sets text_config.dtype to "bfloat16" +# (see https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf/discussions/46), which we clear here. + +import torch +from transformers import AutoConfig, AutoProcessor, GenerationConfig, LlavaNextForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "llava-hf/llava-v1.6-mistral-7b-hf" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": None, + "dtype": None, # hotfix for upstream text_config.dtype = "bfloat16" +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, +} + +config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config) +model = LlavaNextForConditionalGeneration(config).to(dtype=torch.bfloat16) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/paligemma_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/paligemma_for_conditional_generation.py new file mode 100644 index 00000000000..9d2f528f033 --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/paligemma_for_conditional_generation.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoProcessor, GenerationConfig, PaliGemmaForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "google/paligemma-3b-pt-224" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": None, +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, +} + +config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config) +model = PaliGemmaForConditionalGeneration(config).to(dtype=torch.float32) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/qwen2_5_vl_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/qwen2_5_vl_for_conditional_generation.py new file mode 100644 index 00000000000..0f6fa35ad80 --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/qwen2_5_vl_for_conditional_generation.py @@ -0,0 +1,62 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Note: Qwen2.5-VL requires out_hidden_size on the vision config, plus root-level num_hidden_layers/hidden_size/ +# num_attention_heads (distinct from the text_config fields). See GH-4101 and transformers#41020. + +import torch +from transformers import AutoConfig, AutoProcessor, GenerationConfig, Qwen2_5_VLForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen2.5-VL-3B-Instruct" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": None, + "rope_scaling": {"type": "default", "mrope_section": [1, 1], "rope_type": "default"}, +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, + "depth": 2, + "out_hidden_size": 16, +} + +config = AutoConfig.from_pretrained( + MODEL_ID, + text_config=text_config, + vision_config=vision_config, + rope_scaling={"type": "default", "mrope_section": [1, 1], "rope_type": "default"}, + num_hidden_layers=2, + hidden_size=16, + num_attention_heads=4, +) +model = Qwen2_5_VLForConditionalGeneration(config).to(dtype=torch.bfloat16) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/qwen2_vl_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/qwen2_vl_for_conditional_generation.py new file mode 100644 index 00000000000..c524ddaed18 --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/qwen2_vl_for_conditional_generation.py @@ -0,0 +1,57 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Note: two distinct rope_scaling dict objects (root and text_config). See GH-4101 and transformers#41020. + +import torch +from transformers import AutoConfig, AutoProcessor, GenerationConfig, Qwen2VLForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": None, + "rope_scaling": {"type": "default", "mrope_section": [1, 1], "rope_type": "default"}, +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, + "depth": 2, +} + +config = AutoConfig.from_pretrained( + MODEL_ID, + text_config=text_config, + vision_config=vision_config, + rope_scaling={"type": "default", "mrope_section": [1, 1], "rope_type": "default"}, +) +model = Qwen2VLForConditionalGeneration(config).to(dtype=torch.bfloat16) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/qwen3_5_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/qwen3_5_for_conditional_generation.py new file mode 100644 index 00000000000..54bf25d2882 --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/qwen3_5_for_conditional_generation.py @@ -0,0 +1,62 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Notes: +# - Qwen3.5 auto-builds layer_types from num_hidden_layers with default interval 4, so tiny models +# (2 layers) end up all-linear-attention, which breaks dynamic cache. Force one full-attention layer. +# - The vision config expects `depth`/`num_heads` (not `num_hidden_layers`/`num_attention_heads`). +# - Qwen3.5 has no published generation_config on the Hub yet. +# - Qwen3.5 keeps some linear-attn weights in float32; we cast them back after the bfloat16 conversion. + +import torch +from transformers import AutoConfig, AutoProcessor, Qwen3_5ForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "5.2.0" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen3.5-0.8B" + +processor = AutoProcessor.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": ["linear_attention", "full_attention"], + "full_attention_interval": 2, +} +vision_config = { + "hidden_size": 16, + "depth": 2, + "num_heads": 4, + "intermediate_size": 32, + "out_hidden_size": 16, +} + +config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config) +model = Qwen3_5ForConditionalGeneration(config).to(dtype=torch.bfloat16) + +# Restore float32 for linear-attn weights that the upstream model keeps in fp32. +for layer in model.model.language_model.layers: + if hasattr(layer, "linear_attn"): + layer.linear_attn.A_log.data = layer.linear_attn.A_log.data.float() + layer.linear_attn.norm.weight.data = layer.linear_attn.norm.weight.data.float() + +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, None, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/qwen3_vl_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/qwen3_vl_for_conditional_generation.py new file mode 100644 index 00000000000..794d771ef38 --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/qwen3_vl_for_conditional_generation.py @@ -0,0 +1,56 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Notes: +# - "layer_types" is intentionally omitted from text_config: qwen3_vl's modeling code checks +# `hasattr(config, "layer_types")` and uses a different path when absent +# (see transformers/models/qwen3_vl/modeling_qwen3_vl.py). +# - mrope_section needs 3 elements (for dim, offset in enumerate((1, 2), start=1): mrope_section[dim]). + +import torch +from transformers import AutoConfig, AutoProcessor, GenerationConfig, Qwen3VLForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.57.0" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen3-VL-2B-Instruct" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "rope_scaling": {"mrope_interleaved": True, "mrope_section": [2, 2, 2], "rope_type": "default"}, +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, + "depth": 2, + "out_hidden_size": 16, +} + +config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config) +model = Qwen3VLForConditionalGeneration(config).to(dtype=torch.bfloat16) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/smolvlm_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/smolvlm_for_conditional_generation.py new file mode 100644 index 00000000000..88433f4f971 --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/smolvlm_for_conditional_generation.py @@ -0,0 +1,48 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoProcessor, GenerationConfig, SmolVLMForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "HuggingFaceTB/SmolVLM2-2.2B-Instruct" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": None, +} +vision_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, +} + +config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config) +model = SmolVLMForConditionalGeneration(config).to(dtype=torch.float32) +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_conditional_generation/t5_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/t5_for_conditional_generation.py new file mode 100644 index 00000000000..4f19ef21f66 --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/t5_for_conditional_generation.py @@ -0,0 +1,33 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, T5ForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "google/flan-t5-small" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = AutoConfig.from_pretrained(MODEL_ID) +config.d_model = 24 +model = T5ForConditionalGeneration(config).to(dtype=torch.float32) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_sequence_classification/__init__.py b/scripts/generate_tiny_models/for_sequence_classification/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/scripts/generate_tiny_models/for_sequence_classification/gpt_neox_for_sequence_classification.py b/scripts/generate_tiny_models/for_sequence_classification/gpt_neox_for_sequence_classification.py new file mode 100644 index 00000000000..ede81cdf693 --- /dev/null +++ b/scripts/generate_tiny_models/for_sequence_classification/gpt_neox_for_sequence_classification.py @@ -0,0 +1,41 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, GPTNeoXForSequenceClassification + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "EleutherAI/pythia-14m" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = AutoConfig.from_pretrained( + MODEL_ID, + num_labels=1, + hidden_size=16, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = GPTNeoXForSequenceClassification(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_sequence_classification/llama_for_sequence_classification_3_2.py b/scripts/generate_tiny_models/for_sequence_classification/llama_for_sequence_classification_3_2.py new file mode 100644 index 00000000000..4dede5dff74 --- /dev/null +++ b/scripts/generate_tiny_models/for_sequence_classification/llama_for_sequence_classification_3_2.py @@ -0,0 +1,41 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, LlamaForSequenceClassification + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = AutoConfig.from_pretrained( + MODEL_ID, + num_labels=1, + hidden_size=16, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = LlamaForSequenceClassification(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "3.2") diff --git a/scripts/generate_tiny_models/for_sequence_classification/qwen2_for_sequence_classification_2_5.py b/scripts/generate_tiny_models/for_sequence_classification/qwen2_for_sequence_classification_2_5.py new file mode 100644 index 00000000000..33ebaf58ef1 --- /dev/null +++ b/scripts/generate_tiny_models/for_sequence_classification/qwen2_for_sequence_classification_2_5.py @@ -0,0 +1,43 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen2ForSequenceClassification + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen2.5-32B-Instruct" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = AutoConfig.from_pretrained( + MODEL_ID, + num_labels=1, + hidden_size=16, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +# Bug in transformers: it ignores num_hidden_layers to build layer_types +config.layer_types = config.layer_types[:2] +model = Qwen2ForSequenceClassification(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "2.5") diff --git a/scripts/generate_tiny_models/for_sequence_classification/qwen3_for_sequence_classification.py b/scripts/generate_tiny_models/for_sequence_classification/qwen3_for_sequence_classification.py new file mode 100644 index 00000000000..e48a7296950 --- /dev/null +++ b/scripts/generate_tiny_models/for_sequence_classification/qwen3_for_sequence_classification.py @@ -0,0 +1,43 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen3ForSequenceClassification + +from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen3-4B" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = AutoConfig.from_pretrained( + MODEL_ID, + num_labels=1, + hidden_size=16, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +# Bug in transformers: it ignores num_hidden_layers to build layer_types +config.layer_types = config.layer_types[:2] +model = Qwen3ForSequenceClassification(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/scripts/generate_tiny_models/for_sequence_classification/qwen3_moe_for_sequence_classification.py b/scripts/generate_tiny_models/for_sequence_classification/qwen3_moe_for_sequence_classification.py new file mode 100644 index 00000000000..c6f829dc8c7 --- /dev/null +++ b/scripts/generate_tiny_models/for_sequence_classification/qwen3_moe_for_sequence_classification.py @@ -0,0 +1,42 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen3MoeForSequenceClassification + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen3-30B-A3B" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = AutoConfig.from_pretrained( + MODEL_ID, + num_labels=1, + hidden_size=16, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, + num_experts=4, + num_experts_per_tok=2, +) +model = Qwen3MoeForSequenceClassification(config).to(dtype=torch.bfloat16) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny") diff --git a/tests/conftest.py b/tests/conftest.py index f071b789ffd..4008a4d9d0b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -37,6 +37,7 @@ MODEL_REVISIONS = { # Add model_id: revision mappings here to test PRs + "trl-internal-testing/tiny-Gemma3ForConditionalGeneration": "refs/pr/7", } From a060e6df799484827b5c0c6bfd463677564ca815 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Fri, 24 Apr 2026 18:51:07 +0000 Subject: [PATCH 02/20] cohere and fix vocab size --- .../for_causal_lm/cohere2_for_causal_lm.py | 4 ++-- .../for_causal_lm/cohere_for_causal_lm.py | 5 +++-- .../for_causal_lm/deepseek_v3_for_causal_lm.py | 4 ++-- .../for_causal_lm/deepseek_v3_for_causal_lm_0528.py | 4 ++-- .../for_causal_lm/falcon_mamba_for_causal_lm.py | 4 ++-- .../for_causal_lm/gemma2_for_causal_lm.py | 4 ++-- .../for_causal_lm/gemma_for_causal_lm.py | 4 ++-- .../for_causal_lm/glm4_moe_for_causal_lm.py | 4 ++-- .../generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py | 4 ++-- .../for_causal_lm/gpt_neox_for_causal_lm.py | 4 ++-- .../for_causal_lm/gpt_oss_for_causal_lm.py | 4 ++-- .../for_causal_lm/llama_for_causal_lm_3.py | 4 ++-- .../for_causal_lm/llama_for_causal_lm_3_1.py | 4 ++-- .../for_causal_lm/llama_for_causal_lm_3_2.py | 4 ++-- .../for_causal_lm/mistral_for_causal_lm_0_1.py | 4 ++-- .../for_causal_lm/mistral_for_causal_lm_0_2.py | 4 ++-- .../generate_tiny_models/for_causal_lm/opt_for_causal_lm.py | 4 ++-- .../for_causal_lm/phi3_for_causal_lm_3.py | 4 ++-- .../for_causal_lm/phi3_for_causal_lm_3_5.py | 4 ++-- .../for_causal_lm/qwen2_for_causal_lm_2_5.py | 4 ++-- .../for_causal_lm/qwen2_for_causal_lm_2_5_coder.py | 4 ++-- .../for_causal_lm/qwen3_for_causal_lm.py | 4 ++-- .../for_causal_lm/qwen3_moe_for_causal_lm.py | 4 ++-- .../for_causal_lm/small_qwen2_for_causal_lm_2_5.py | 4 ++-- .../for_causal_lm/small_qwen3_for_causal_lm.py | 4 ++-- .../gemma3_for_conditional_generation.py | 2 -- tests/conftest.py | 1 + 27 files changed, 52 insertions(+), 52 deletions(-) diff --git a/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py index fe1d72eae89..82e11dc84a8 100644 --- a/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoTokenizer, Cohere2Config, Cohere2ForCausalLM, GenerationConfig +from transformers import AutoConfig, AutoTokenizer, Cohere2Config, Cohere2ForCausalLM, GenerationConfig from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -25,7 +25,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = Cohere2Config( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py index 48336e33ecc..c083d083ba5 100644 --- a/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoTokenizer, CohereConfig, CohereForCausalLM, GenerationConfig +from transformers import AutoConfig, AutoTokenizer, CohereConfig, CohereForCausalLM, GenerationConfig from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -25,12 +25,13 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = CohereConfig( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, num_hidden_layers=2, intermediate_size=32, + logit_scale=0.125, ) model = CohereForCausalLM(config).to(dtype=torch.float16) init_weights_tiny_model(model) diff --git a/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py index 0c29cffbae1..fa4274fb70a 100644 --- a/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoTokenizer, DeepseekV3Config, DeepseekV3ForCausalLM, GenerationConfig +from transformers import AutoConfig, AutoTokenizer, DeepseekV3Config, DeepseekV3ForCausalLM, GenerationConfig from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -25,7 +25,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = DeepseekV3Config( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm_0528.py b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm_0528.py index a29bd6cec0e..bd852cfe16d 100644 --- a/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm_0528.py +++ b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm_0528.py @@ -15,7 +15,7 @@ # Note: R1-0528 is kept in addition to R1 because it has a different chat template. import torch -from transformers import AutoTokenizer, DeepseekV3Config, DeepseekV3ForCausalLM, GenerationConfig +from transformers import AutoConfig, AutoTokenizer, DeepseekV3Config, DeepseekV3ForCausalLM, GenerationConfig from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -27,7 +27,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = DeepseekV3Config( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/falcon_mamba_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/falcon_mamba_for_causal_lm.py index 502bdd35831..a588f131bd2 100644 --- a/scripts/generate_tiny_models/for_causal_lm/falcon_mamba_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/falcon_mamba_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoTokenizer, FalconMambaConfig, FalconMambaForCausalLM, GenerationConfig +from transformers import AutoConfig, AutoTokenizer, FalconMambaConfig, FalconMambaForCausalLM, GenerationConfig from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -25,7 +25,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = FalconMambaConfig( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/gemma2_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gemma2_for_causal_lm.py index 3d96bd09a9a..4e5f41aff9f 100644 --- a/scripts/generate_tiny_models/for_causal_lm/gemma2_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/gemma2_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoTokenizer, Gemma2Config, Gemma2ForCausalLM, GenerationConfig +from transformers import AutoConfig, AutoTokenizer, Gemma2Config, Gemma2ForCausalLM, GenerationConfig from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -25,7 +25,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = Gemma2Config( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/gemma_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gemma_for_causal_lm.py index b391e48473a..2c3944eea67 100644 --- a/scripts/generate_tiny_models/for_causal_lm/gemma_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/gemma_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoTokenizer, GemmaConfig, GemmaForCausalLM, GenerationConfig +from transformers import AutoConfig, AutoTokenizer, GemmaConfig, GemmaForCausalLM, GenerationConfig from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -25,7 +25,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = GemmaConfig( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/glm4_moe_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/glm4_moe_for_causal_lm.py index c07297fd00e..658c0ff9bf9 100644 --- a/scripts/generate_tiny_models/for_causal_lm/glm4_moe_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/glm4_moe_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoTokenizer, GenerationConfig, Glm4MoeConfig, Glm4MoeForCausalLM +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Glm4MoeConfig, Glm4MoeForCausalLM from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -25,7 +25,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = Glm4MoeConfig( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py b/scripts/generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py index 758841b0b33..44fb9dbad35 100644 --- a/scripts/generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py +++ b/scripts/generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoTokenizer, GenerationConfig, GPT2Config, GPT2LMHeadModel +from transformers import AutoConfig, AutoTokenizer, GPT2Config, GPT2LMHeadModel, GenerationConfig from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -25,7 +25,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = GPT2Config( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/gpt_neox_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gpt_neox_for_causal_lm.py index f2eed955e9f..608f377669b 100644 --- a/scripts/generate_tiny_models/for_causal_lm/gpt_neox_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/gpt_neox_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoTokenizer, GenerationConfig, GPTNeoXConfig, GPTNeoXForCausalLM +from transformers import AutoConfig, AutoTokenizer, GPTNeoXConfig, GPTNeoXForCausalLM, GenerationConfig from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -25,7 +25,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = GPTNeoXConfig( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/gpt_oss_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gpt_oss_for_causal_lm.py index 88b100da305..599c14e7f56 100644 --- a/scripts/generate_tiny_models/for_causal_lm/gpt_oss_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/gpt_oss_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoTokenizer, GenerationConfig, GptOssConfig, GptOssForCausalLM +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, GptOssConfig, GptOssForCausalLM from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -25,7 +25,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = GptOssConfig( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3.py b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3.py index 41140f1b431..e24e0958804 100644 --- a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3.py +++ b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -25,7 +25,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = LlamaConfig( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_1.py b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_1.py index 8ddf1a3a5cb..071c72b7cc0 100644 --- a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_1.py +++ b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_1.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -25,7 +25,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = LlamaConfig( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_2.py b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_2.py index d6396fdc11e..7691e69d318 100644 --- a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_2.py +++ b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_2.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -25,7 +25,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = LlamaConfig( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_1.py b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_1.py index 33ed69fd2c4..1c7bf17a370 100644 --- a/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_1.py +++ b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_1.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoTokenizer, GenerationConfig, MistralConfig, MistralForCausalLM +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, MistralConfig, MistralForCausalLM from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -25,7 +25,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = MistralConfig( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_2.py b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_2.py index 1463c60c9ab..638b86df193 100644 --- a/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_2.py +++ b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_2.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoTokenizer, GenerationConfig, MistralConfig, MistralForCausalLM +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, MistralConfig, MistralForCausalLM from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -25,7 +25,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = MistralConfig( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/opt_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/opt_for_causal_lm.py index dd34ffeb4ca..a8b64038f28 100644 --- a/scripts/generate_tiny_models/for_causal_lm/opt_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/opt_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoTokenizer, GenerationConfig, OPTConfig, OPTForCausalLM +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, OPTConfig, OPTForCausalLM from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -25,7 +25,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = OPTConfig( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3.py b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3.py index 3dbe53eb51b..982e802e09f 100644 --- a/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3.py +++ b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoTokenizer, GenerationConfig, Phi3Config, Phi3ForCausalLM +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Phi3Config, Phi3ForCausalLM from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -25,7 +25,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = Phi3Config( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3_5.py b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3_5.py index 9685f638b9e..23e367b8040 100644 --- a/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3_5.py +++ b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3_5.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoTokenizer, GenerationConfig, Phi3Config, Phi3ForCausalLM +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Phi3Config, Phi3ForCausalLM from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -25,7 +25,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = Phi3Config( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5.py b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5.py index 81ca31f7957..84f168180ce 100644 --- a/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5.py +++ b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -25,7 +25,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = Qwen2Config( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5_coder.py b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5_coder.py index 18af3689ef5..6ada52fea5d 100644 --- a/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5_coder.py +++ b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5_coder.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -25,7 +25,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = Qwen2Config( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm.py index cea498065e7..ebb45674ac2 100644 --- a/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoTokenizer, GenerationConfig, Qwen3Config, Qwen3ForCausalLM +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen3Config, Qwen3ForCausalLM from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -27,7 +27,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, revision=REVISION) generation_config = GenerationConfig.from_pretrained(MODEL_ID, revision=REVISION) config = Qwen3Config( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen3_moe_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/qwen3_moe_for_causal_lm.py index 96f7e74059f..3e42172015f 100644 --- a/scripts/generate_tiny_models/for_causal_lm/qwen3_moe_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/qwen3_moe_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoTokenizer, GenerationConfig, Qwen3MoeConfig, Qwen3MoeForCausalLM +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen3MoeConfig, Qwen3MoeForCausalLM from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test @@ -25,7 +25,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = Qwen3MoeConfig( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/small_qwen2_for_causal_lm_2_5.py b/scripts/generate_tiny_models/for_causal_lm/small_qwen2_for_causal_lm_2_5.py index a19f39a4cf4..225c4be0587 100644 --- a/scripts/generate_tiny_models/for_causal_lm/small_qwen2_for_causal_lm_2_5.py +++ b/scripts/generate_tiny_models/for_causal_lm/small_qwen2_for_causal_lm_2_5.py @@ -15,7 +15,7 @@ # Slightly bigger than the "tiny" variant: vLLM requires hidden_size // num_attention_heads = 32. import torch -from transformers import AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test @@ -27,7 +27,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = Qwen2Config( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=128, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/small_qwen3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/small_qwen3_for_causal_lm.py index 2af5ba70df3..599c857d64b 100644 --- a/scripts/generate_tiny_models/for_causal_lm/small_qwen3_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/small_qwen3_for_causal_lm.py @@ -15,7 +15,7 @@ # Slightly bigger than the "tiny" variant: vLLM requires hidden_size // num_attention_heads = 32. import torch -from transformers import AutoTokenizer, GenerationConfig, Qwen3Config, Qwen3ForCausalLM +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen3Config, Qwen3ForCausalLM from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test @@ -27,7 +27,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = Qwen3Config( - vocab_size=len(tokenizer.vocab), + vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, hidden_size=128, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_conditional_generation/gemma3_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/gemma3_for_conditional_generation.py index 7c928061f61..050865d27ff 100644 --- a/scripts/generate_tiny_models/for_conditional_generation/gemma3_for_conditional_generation.py +++ b/scripts/generate_tiny_models/for_conditional_generation/gemma3_for_conditional_generation.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -# python -m scripts.generate_tiny_models.for_conditional_generation.gemma3_for_conditional_generation - import torch from transformers import AutoConfig, AutoProcessor, Gemma3ForConditionalGeneration, GenerationConfig diff --git a/tests/conftest.py b/tests/conftest.py index 4008a4d9d0b..921ae910cda 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -37,6 +37,7 @@ MODEL_REVISIONS = { # Add model_id: revision mappings here to test PRs + "trl-internal-testing/tiny-CohereForCausalLM": "refs/pr/1", "trl-internal-testing/tiny-Gemma3ForConditionalGeneration": "refs/pr/7", } From 158b891a198f667b9396838da7c76bbf285b19e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Fri, 24 Apr 2026 18:51:14 +0000 Subject: [PATCH 03/20] print pr --- scripts/generate_tiny_models/_common.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/generate_tiny_models/_common.py b/scripts/generate_tiny_models/_common.py index 1f713853f59..96ef12b617b 100644 --- a/scripts/generate_tiny_models/_common.py +++ b/scripts/generate_tiny_models/_common.py @@ -216,12 +216,14 @@ def push_to_hub(model, tokenizer, generation_config, prefix=None, suffix=None, f for root, _, files in os.walk(tmpdir) for name in files ] - api.create_commit( + commit_info = api.create_commit( repo_id=repo_id, operations=operations, commit_message=f"Upload tiny {model.__class__.__name__}", create_pr=exists and create_pr, ) + if commit_info.pr_url: + print(f"[push_to_hub] PR opened: {commit_info.pr_url}") def init_weights_tiny_model(model): From f5eedfb6815e1a2655b0963426a348a125b07e15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Fri, 24 Apr 2026 18:55:13 +0000 Subject: [PATCH 04/20] precommit --- scripts/generate_tiny_models/__init__.py | 14 ++++++++++++++ .../generate_tiny_models/for_causal_lm/__init__.py | 14 ++++++++++++++ .../for_conditional_generation/__init__.py | 14 ++++++++++++++ .../for_sequence_classification/__init__.py | 14 ++++++++++++++ 4 files changed, 56 insertions(+) diff --git a/scripts/generate_tiny_models/__init__.py b/scripts/generate_tiny_models/__init__.py index e69de29bb2d..3d26f4482fe 100644 --- a/scripts/generate_tiny_models/__init__.py +++ b/scripts/generate_tiny_models/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scripts/generate_tiny_models/for_causal_lm/__init__.py b/scripts/generate_tiny_models/for_causal_lm/__init__.py index e69de29bb2d..3d26f4482fe 100644 --- a/scripts/generate_tiny_models/for_causal_lm/__init__.py +++ b/scripts/generate_tiny_models/for_causal_lm/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scripts/generate_tiny_models/for_conditional_generation/__init__.py b/scripts/generate_tiny_models/for_conditional_generation/__init__.py index e69de29bb2d..3d26f4482fe 100644 --- a/scripts/generate_tiny_models/for_conditional_generation/__init__.py +++ b/scripts/generate_tiny_models/for_conditional_generation/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/scripts/generate_tiny_models/for_sequence_classification/__init__.py b/scripts/generate_tiny_models/for_sequence_classification/__init__.py index e69de29bb2d..3d26f4482fe 100644 --- a/scripts/generate_tiny_models/for_sequence_classification/__init__.py +++ b/scripts/generate_tiny_models/for_sequence_classification/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + From ffbf3b1730e4a742b4f3f4791e3a0507dca21ca6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Fri, 24 Apr 2026 18:58:47 +0000 Subject: [PATCH 05/20] precommit --- scripts/generate_tiny_models/_common.py | 15 +++++++++++---- .../for_causal_lm/cohere2_for_causal_lm.py | 10 +++++++++- .../for_causal_lm/cohere_for_causal_lm.py | 10 +++++++++- .../for_causal_lm/deepseek_v3_for_causal_lm.py | 10 +++++++++- .../deepseek_v3_for_causal_lm_0528.py | 10 +++++++++- .../for_causal_lm/falcon_mamba_for_causal_lm.py | 10 +++++++++- .../for_causal_lm/gemma2_for_causal_lm.py | 10 +++++++++- .../for_causal_lm/gemma_for_causal_lm.py | 10 +++++++++- .../for_causal_lm/glm4_moe_for_causal_lm.py | 10 +++++++++- .../for_causal_lm/gpt2_lm_head_model.py | 12 ++++++++++-- .../for_causal_lm/gpt_neox_for_causal_lm.py | 12 ++++++++++-- .../for_causal_lm/gpt_oss_for_causal_lm.py | 10 +++++++++- .../for_causal_lm/llama_for_causal_lm_3.py | 10 +++++++++- .../for_causal_lm/llama_for_causal_lm_3_1.py | 10 +++++++++- .../for_causal_lm/llama_for_causal_lm_3_2.py | 10 +++++++++- .../for_causal_lm/mistral_for_causal_lm_0_1.py | 10 +++++++++- .../for_causal_lm/mistral_for_causal_lm_0_2.py | 10 +++++++++- .../for_causal_lm/opt_for_causal_lm.py | 10 +++++++++- .../for_causal_lm/peft_qwen3_for_causal_lm.py | 1 + .../for_causal_lm/peft_qwen3_for_causal_lm_2.py | 1 + .../for_causal_lm/phi3_for_causal_lm_3.py | 10 +++++++++- .../for_causal_lm/phi3_for_causal_lm_3_5.py | 10 +++++++++- .../for_causal_lm/qwen2_for_causal_lm_2_5.py | 10 +++++++++- .../qwen2_for_causal_lm_2_5_coder.py | 10 +++++++++- .../for_causal_lm/qwen3_for_causal_lm.py | 10 +++++++++- .../for_causal_lm/qwen3_moe_for_causal_lm.py | 10 +++++++++- .../small_qwen2_for_causal_lm_2_5.py | 1 + .../for_causal_lm/small_qwen3_for_causal_lm.py | 1 + .../for_conditional_generation/bart_model.py | 1 + .../gemma3_for_conditional_generation.py | 1 + .../gemma4_for_conditional_generation.py | 1 + .../idefics2_for_conditional_generation.py | 1 + .../idefics3_for_conditional_generation.py | 1 + .../internvl_for_conditional_generation.py | 1 + .../llava_for_conditional_generation.py | 1 + .../llava_next_for_conditional_generation.py | 1 + .../paligemma_for_conditional_generation.py | 1 + .../qwen2_5_vl_for_conditional_generation.py | 1 + .../qwen2_vl_for_conditional_generation.py | 1 + .../qwen3_5_for_conditional_generation.py | 1 + .../qwen3_vl_for_conditional_generation.py | 1 + .../smolvlm_for_conditional_generation.py | 1 + .../t5_for_conditional_generation.py | 1 + .../gpt_neox_for_sequence_classification.py | 10 +++++++++- .../llama_for_sequence_classification_3_2.py | 10 +++++++++- .../qwen2_for_sequence_classification_2_5.py | 10 +++++++++- .../qwen3_for_sequence_classification.py | 10 +++++++++- .../qwen3_moe_for_sequence_classification.py | 1 + 48 files changed, 276 insertions(+), 33 deletions(-) diff --git a/scripts/generate_tiny_models/_common.py b/scripts/generate_tiny_models/_common.py index 96ef12b617b..900a8ebe6f4 100644 --- a/scripts/generate_tiny_models/_common.py +++ b/scripts/generate_tiny_models/_common.py @@ -50,7 +50,7 @@ def check_transformers_version(expected_version): if Version(transformers.__version__) != Version(expected_version): raise RuntimeError( - f"This script requires transformers=={expected_version}, " f"but {transformers.__version__} is installed." + f"This script requires transformers=={expected_version}, but {transformers.__version__} is installed." ) @@ -67,7 +67,12 @@ def smoke_test(model, tokenizer_or_processor=None): red = Image.new("RGB", (24, 24), color="red") blue = Image.new("RGB", (24, 24), color="blue") messages = [ - [{"role": "user", "content": [{"type": "image", "image": red}, {"type": "text", "text": "What is this?"}]}], + [ + { + "role": "user", + "content": [{"type": "image", "image": red}, {"type": "text", "text": "What is this?"}], + } + ], [{"role": "user", "content": [{"type": "text", "text": "Is it blue?"}, {"type": "image", "image": blue}]}], ] inputs = processor.apply_chat_template( @@ -211,8 +216,10 @@ def push_to_hub(model, tokenizer, generation_config, prefix=None, suffix=None, f model_card.save(os.path.join(tmpdir, "README.md")) operations = [ - CommitOperationAdd(path_in_repo=os.path.relpath(os.path.join(root, name), tmpdir), - path_or_fileobj=os.path.join(root, name)) + CommitOperationAdd( + path_in_repo=os.path.relpath(os.path.join(root, name), tmpdir), + path_or_fileobj=os.path.join(root, name), + ) for root, _, files in os.walk(tmpdir) for name in files ] diff --git a/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py index 82e11dc84a8..f1508fe8b57 100644 --- a/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, Cohere2Config, Cohere2ForCausalLM, GenerationConfig -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py index c083d083ba5..6731ff52838 100644 --- a/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, CohereConfig, CohereForCausalLM, GenerationConfig -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py index fa4274fb70a..d480a516606 100644 --- a/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, DeepseekV3Config, DeepseekV3ForCausalLM, GenerationConfig -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm_0528.py b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm_0528.py index bd852cfe16d..9fdb50c2d90 100644 --- a/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm_0528.py +++ b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm_0528.py @@ -17,7 +17,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, DeepseekV3Config, DeepseekV3ForCausalLM, GenerationConfig -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/falcon_mamba_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/falcon_mamba_for_causal_lm.py index a588f131bd2..f93f1653d5a 100644 --- a/scripts/generate_tiny_models/for_causal_lm/falcon_mamba_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/falcon_mamba_for_causal_lm.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, FalconMambaConfig, FalconMambaForCausalLM, GenerationConfig -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/gemma2_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gemma2_for_causal_lm.py index 4e5f41aff9f..ce38bcc8ae1 100644 --- a/scripts/generate_tiny_models/for_causal_lm/gemma2_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/gemma2_for_causal_lm.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, Gemma2Config, Gemma2ForCausalLM, GenerationConfig -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/gemma_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gemma_for_causal_lm.py index 2c3944eea67..bdd85a572cc 100644 --- a/scripts/generate_tiny_models/for_causal_lm/gemma_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/gemma_for_causal_lm.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, GemmaConfig, GemmaForCausalLM, GenerationConfig -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/glm4_moe_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/glm4_moe_for_causal_lm.py index 658c0ff9bf9..c96fbc1b89e 100644 --- a/scripts/generate_tiny_models/for_causal_lm/glm4_moe_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/glm4_moe_for_causal_lm.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Glm4MoeConfig, Glm4MoeForCausalLM -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py b/scripts/generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py index 44fb9dbad35..a91c36aae73 100644 --- a/scripts/generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py +++ b/scripts/generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py @@ -13,9 +13,17 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, GPT2Config, GPT2LMHeadModel, GenerationConfig +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, GPT2Config, GPT2LMHeadModel + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/gpt_neox_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gpt_neox_for_causal_lm.py index 608f377669b..18bc7d12956 100644 --- a/scripts/generate_tiny_models/for_causal_lm/gpt_neox_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/gpt_neox_for_causal_lm.py @@ -13,9 +13,17 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, GPTNeoXConfig, GPTNeoXForCausalLM, GenerationConfig +from transformers import AutoConfig, AutoTokenizer, GenerationConfig, GPTNeoXConfig, GPTNeoXForCausalLM + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/gpt_oss_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gpt_oss_for_causal_lm.py index 599c14e7f56..270adc826e5 100644 --- a/scripts/generate_tiny_models/for_causal_lm/gpt_oss_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/gpt_oss_for_causal_lm.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, GenerationConfig, GptOssConfig, GptOssForCausalLM -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3.py b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3.py index e24e0958804..f3808e8d992 100644 --- a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3.py +++ b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_1.py b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_1.py index 071c72b7cc0..ed9e607c9f7 100644 --- a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_1.py +++ b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_1.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_2.py b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_2.py index 7691e69d318..e4285e892b3 100644 --- a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_2.py +++ b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_2.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_1.py b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_1.py index 1c7bf17a370..061c5d27cfa 100644 --- a/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_1.py +++ b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_1.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, GenerationConfig, MistralConfig, MistralForCausalLM -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_2.py b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_2.py index 638b86df193..6c477fd0922 100644 --- a/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_2.py +++ b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_2.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, GenerationConfig, MistralConfig, MistralForCausalLM -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/opt_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/opt_for_causal_lm.py index a8b64038f28..817223bad05 100644 --- a/scripts/generate_tiny_models/for_causal_lm/opt_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/opt_for_causal_lm.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, GenerationConfig, OPTConfig, OPTForCausalLM -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm.py index c1e4b8c261e..7f647facaf8 100644 --- a/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm.py @@ -17,6 +17,7 @@ from .._common import check_transformers_version, push_to_hub, smoke_test + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm_2.py b/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm_2.py index 1d0da0a62c3..08fb10ce0a9 100644 --- a/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm_2.py +++ b/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm_2.py @@ -19,6 +19,7 @@ from .._common import check_transformers_version, push_to_hub, smoke_test + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3.py b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3.py index 982e802e09f..87ab1a30db2 100644 --- a/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3.py +++ b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Phi3Config, Phi3ForCausalLM -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3_5.py b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3_5.py index 23e367b8040..0b6ce00bbdc 100644 --- a/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3_5.py +++ b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3_5.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Phi3Config, Phi3ForCausalLM -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5.py b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5.py index 84f168180ce..0198bb05ed8 100644 --- a/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5.py +++ b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5_coder.py b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5_coder.py index 6ada52fea5d..908fc0692b1 100644 --- a/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5_coder.py +++ b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5_coder.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm.py index ebb45674ac2..84ff6176136 100644 --- a/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen3Config, Qwen3ForCausalLM -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen3_moe_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/qwen3_moe_for_causal_lm.py index 3e42172015f..0f24e3175c6 100644 --- a/scripts/generate_tiny_models/for_causal_lm/qwen3_moe_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/qwen3_moe_for_causal_lm.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen3MoeConfig, Qwen3MoeForCausalLM -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/small_qwen2_for_causal_lm_2_5.py b/scripts/generate_tiny_models/for_causal_lm/small_qwen2_for_causal_lm_2_5.py index 225c4be0587..5556958abce 100644 --- a/scripts/generate_tiny_models/for_causal_lm/small_qwen2_for_causal_lm_2_5.py +++ b/scripts/generate_tiny_models/for_causal_lm/small_qwen2_for_causal_lm_2_5.py @@ -19,6 +19,7 @@ from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_causal_lm/small_qwen3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/small_qwen3_for_causal_lm.py index 599c857d64b..2b3840ff535 100644 --- a/scripts/generate_tiny_models/for_causal_lm/small_qwen3_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/small_qwen3_for_causal_lm.py @@ -19,6 +19,7 @@ from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_conditional_generation/bart_model.py b/scripts/generate_tiny_models/for_conditional_generation/bart_model.py index aa180d5e119..c754515af72 100644 --- a/scripts/generate_tiny_models/for_conditional_generation/bart_model.py +++ b/scripts/generate_tiny_models/for_conditional_generation/bart_model.py @@ -17,6 +17,7 @@ from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_conditional_generation/gemma3_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/gemma3_for_conditional_generation.py index 050865d27ff..1cc3f2f361d 100644 --- a/scripts/generate_tiny_models/for_conditional_generation/gemma3_for_conditional_generation.py +++ b/scripts/generate_tiny_models/for_conditional_generation/gemma3_for_conditional_generation.py @@ -17,6 +17,7 @@ from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_conditional_generation/gemma4_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/gemma4_for_conditional_generation.py index 23d1ddbeed0..8d3cba21904 100644 --- a/scripts/generate_tiny_models/for_conditional_generation/gemma4_for_conditional_generation.py +++ b/scripts/generate_tiny_models/for_conditional_generation/gemma4_for_conditional_generation.py @@ -20,6 +20,7 @@ from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + TRANSFORMERS_VERSION = "5.6.0" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_conditional_generation/idefics2_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/idefics2_for_conditional_generation.py index 74b61fac6bc..4e6dd711aff 100644 --- a/scripts/generate_tiny_models/for_conditional_generation/idefics2_for_conditional_generation.py +++ b/scripts/generate_tiny_models/for_conditional_generation/idefics2_for_conditional_generation.py @@ -17,6 +17,7 @@ from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_conditional_generation/idefics3_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/idefics3_for_conditional_generation.py index fd6e71c3cc0..a24909ab7d8 100644 --- a/scripts/generate_tiny_models/for_conditional_generation/idefics3_for_conditional_generation.py +++ b/scripts/generate_tiny_models/for_conditional_generation/idefics3_for_conditional_generation.py @@ -17,6 +17,7 @@ from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_conditional_generation/internvl_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/internvl_for_conditional_generation.py index de9ef6b6448..76ec81ca4ea 100644 --- a/scripts/generate_tiny_models/for_conditional_generation/internvl_for_conditional_generation.py +++ b/scripts/generate_tiny_models/for_conditional_generation/internvl_for_conditional_generation.py @@ -17,6 +17,7 @@ from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_conditional_generation/llava_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/llava_for_conditional_generation.py index cbc404ed11e..1a7ecf4b8cb 100644 --- a/scripts/generate_tiny_models/for_conditional_generation/llava_for_conditional_generation.py +++ b/scripts/generate_tiny_models/for_conditional_generation/llava_for_conditional_generation.py @@ -17,6 +17,7 @@ from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_conditional_generation/llava_next_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/llava_next_for_conditional_generation.py index f4505b885bb..875fe0545da 100644 --- a/scripts/generate_tiny_models/for_conditional_generation/llava_next_for_conditional_generation.py +++ b/scripts/generate_tiny_models/for_conditional_generation/llava_next_for_conditional_generation.py @@ -22,6 +22,7 @@ from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_conditional_generation/paligemma_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/paligemma_for_conditional_generation.py index 9d2f528f033..5b5fa67ec9d 100644 --- a/scripts/generate_tiny_models/for_conditional_generation/paligemma_for_conditional_generation.py +++ b/scripts/generate_tiny_models/for_conditional_generation/paligemma_for_conditional_generation.py @@ -17,6 +17,7 @@ from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_conditional_generation/qwen2_5_vl_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/qwen2_5_vl_for_conditional_generation.py index 0f6fa35ad80..2eab8826124 100644 --- a/scripts/generate_tiny_models/for_conditional_generation/qwen2_5_vl_for_conditional_generation.py +++ b/scripts/generate_tiny_models/for_conditional_generation/qwen2_5_vl_for_conditional_generation.py @@ -20,6 +20,7 @@ from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_conditional_generation/qwen2_vl_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/qwen2_vl_for_conditional_generation.py index c524ddaed18..b3fdf792418 100644 --- a/scripts/generate_tiny_models/for_conditional_generation/qwen2_vl_for_conditional_generation.py +++ b/scripts/generate_tiny_models/for_conditional_generation/qwen2_vl_for_conditional_generation.py @@ -19,6 +19,7 @@ from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_conditional_generation/qwen3_5_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/qwen3_5_for_conditional_generation.py index 54bf25d2882..2caa177b742 100644 --- a/scripts/generate_tiny_models/for_conditional_generation/qwen3_5_for_conditional_generation.py +++ b/scripts/generate_tiny_models/for_conditional_generation/qwen3_5_for_conditional_generation.py @@ -24,6 +24,7 @@ from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + TRANSFORMERS_VERSION = "5.2.0" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_conditional_generation/qwen3_vl_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/qwen3_vl_for_conditional_generation.py index 794d771ef38..f04fb4cfadf 100644 --- a/scripts/generate_tiny_models/for_conditional_generation/qwen3_vl_for_conditional_generation.py +++ b/scripts/generate_tiny_models/for_conditional_generation/qwen3_vl_for_conditional_generation.py @@ -23,6 +23,7 @@ from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + TRANSFORMERS_VERSION = "4.57.0" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_conditional_generation/smolvlm_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/smolvlm_for_conditional_generation.py index 88433f4f971..b019e6d084b 100644 --- a/scripts/generate_tiny_models/for_conditional_generation/smolvlm_for_conditional_generation.py +++ b/scripts/generate_tiny_models/for_conditional_generation/smolvlm_for_conditional_generation.py @@ -17,6 +17,7 @@ from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_conditional_generation/t5_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/t5_for_conditional_generation.py index 4f19ef21f66..451e7b5ecc9 100644 --- a/scripts/generate_tiny_models/for_conditional_generation/t5_for_conditional_generation.py +++ b/scripts/generate_tiny_models/for_conditional_generation/t5_for_conditional_generation.py @@ -17,6 +17,7 @@ from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_sequence_classification/gpt_neox_for_sequence_classification.py b/scripts/generate_tiny_models/for_sequence_classification/gpt_neox_for_sequence_classification.py index ede81cdf693..c0e270ad050 100644 --- a/scripts/generate_tiny_models/for_sequence_classification/gpt_neox_for_sequence_classification.py +++ b/scripts/generate_tiny_models/for_sequence_classification/gpt_neox_for_sequence_classification.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, GenerationConfig, GPTNeoXForSequenceClassification -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_sequence_classification/llama_for_sequence_classification_3_2.py b/scripts/generate_tiny_models/for_sequence_classification/llama_for_sequence_classification_3_2.py index 4dede5dff74..e082ed94656 100644 --- a/scripts/generate_tiny_models/for_sequence_classification/llama_for_sequence_classification_3_2.py +++ b/scripts/generate_tiny_models/for_sequence_classification/llama_for_sequence_classification_3_2.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, GenerationConfig, LlamaForSequenceClassification -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_sequence_classification/qwen2_for_sequence_classification_2_5.py b/scripts/generate_tiny_models/for_sequence_classification/qwen2_for_sequence_classification_2_5.py index 33ebaf58ef1..6b83cf4204f 100644 --- a/scripts/generate_tiny_models/for_sequence_classification/qwen2_for_sequence_classification_2_5.py +++ b/scripts/generate_tiny_models/for_sequence_classification/qwen2_for_sequence_classification_2_5.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen2ForSequenceClassification -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_sequence_classification/qwen3_for_sequence_classification.py b/scripts/generate_tiny_models/for_sequence_classification/qwen3_for_sequence_classification.py index e48a7296950..fa05dcc1105 100644 --- a/scripts/generate_tiny_models/for_sequence_classification/qwen3_for_sequence_classification.py +++ b/scripts/generate_tiny_models/for_sequence_classification/qwen3_for_sequence_classification.py @@ -15,7 +15,15 @@ import torch from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen3ForSequenceClassification -from .._common import check_dtype_pattern, check_transformers_version, init_weights_tiny_model, print_config_diff, push_to_hub, smoke_test +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) diff --git a/scripts/generate_tiny_models/for_sequence_classification/qwen3_moe_for_sequence_classification.py b/scripts/generate_tiny_models/for_sequence_classification/qwen3_moe_for_sequence_classification.py index c6f829dc8c7..b89842afbaa 100644 --- a/scripts/generate_tiny_models/for_sequence_classification/qwen3_moe_for_sequence_classification.py +++ b/scripts/generate_tiny_models/for_sequence_classification/qwen3_moe_for_sequence_classification.py @@ -17,6 +17,7 @@ from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + TRANSFORMERS_VERSION = "4.56.2" check_transformers_version(TRANSFORMERS_VERSION) From d24a76c87001864fe13e1c3da17c1af82ae6f5f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Fri, 24 Apr 2026 19:12:45 +0000 Subject: [PATCH 06/20] cohere2 --- .../for_causal_lm/cohere2_for_causal_lm.py | 15 +++++++++++++++ tests/conftest.py | 1 + 2 files changed, 16 insertions(+) diff --git a/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py index f1508fe8b57..0b3c779546a 100644 --- a/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py @@ -39,6 +39,21 @@ num_key_value_heads=2, num_hidden_layers=2, intermediate_size=32, + bos_token_id=2, + eos_token_id=3, + logit_scale=1.0, + max_position_embeddings=500000, + rope_theta=50000, + cache_implementation="hybrid", + layer_switch=4, + order_of_interleaved_layers="local_attn_first", + position_embedding_type="rope_gptj", + rotary_pct=1.0, + use_embedding_sharing=True, + use_gated_activation=True, + use_parallel_block=True, + use_parallel_embedding=False, + use_qk_norm=False, ) model = Cohere2ForCausalLM(config).to(dtype=torch.bfloat16) init_weights_tiny_model(model) diff --git a/tests/conftest.py b/tests/conftest.py index 921ae910cda..402c14f635b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -38,6 +38,7 @@ MODEL_REVISIONS = { # Add model_id: revision mappings here to test PRs "trl-internal-testing/tiny-CohereForCausalLM": "refs/pr/1", + "trl-internal-testing/tiny-Cohere2ForCausalLM": "refs/pr/1", "trl-internal-testing/tiny-Gemma3ForConditionalGeneration": "refs/pr/7", } From f0f5563e5ca61adc98a7eb5332c0f5c99c261334 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Fri, 24 Apr 2026 19:58:29 +0000 Subject: [PATCH 07/20] deepseek v3 --- scripts/generate_tiny_models/_common.py | 2 + .../deepseek_v3_for_causal_lm.py | 64 +++++++++++++++++-- tests/conftest.py | 1 + 3 files changed, 62 insertions(+), 5 deletions(-) diff --git a/scripts/generate_tiny_models/_common.py b/scripts/generate_tiny_models/_common.py index 900a8ebe6f4..914df2bd9e4 100644 --- a/scripts/generate_tiny_models/_common.py +++ b/scripts/generate_tiny_models/_common.py @@ -118,6 +118,8 @@ def _flatten(d, prefix=""): torch.float16: "F16", torch.bfloat16: "BF16", torch.float64: "F64", + torch.float8_e4m3fn: "F8_E4M3", + torch.float8_e5m2: "F8_E5M2", torch.int8: "I8", torch.int16: "I16", torch.int32: "I32", diff --git a/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py index d480a516606..6e5b3e1a55a 100644 --- a/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py @@ -12,8 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import tempfile + import torch -from transformers import AutoConfig, AutoTokenizer, DeepseekV3Config, DeepseekV3ForCausalLM, GenerationConfig +from transformers import ( + AutoConfig, + AutoTokenizer, + DeepseekV3Config, + DeepseekV3ForCausalLM, + FineGrainedFP8Config, + GenerationConfig, +) from .._common import ( check_dtype_pattern, @@ -32,16 +41,61 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +# DeepSeek-R1 uses weight_block_size=[128,128] upstream. We use [32,32] for the tiny so that smaller +# hidden dims still tile cleanly (every projection dim divisible by 32, ≥ 2 blocks per dim to avoid +# a scalar weight_scale_inv shape). Trade-off: drops out of the DeepGEMM fast path onto Triton; fine +# for a tiny used in tests. config = DeepseekV3Config( vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, - hidden_size=8, + hidden_size=64, num_attention_heads=4, num_key_value_heads=2, num_hidden_layers=2, - intermediate_size=32, + intermediate_size=64, + max_position_embeddings=163840, + rope_scaling={ + "beta_fast": 32.0, + "beta_slow": 1.0, + "factor": 40.0, + "mscale": 1.0, + "mscale_all_dim": 1.0, + "original_max_position_embeddings": 4096, + "rope_type": "yarn", + "type": "yarn", + }, + ep_size=1, + moe_layer_freq=1, + num_nextn_predict_layers=1, + scoring_func="sigmoid", + topk_method="noaux_tc", ) -model = DeepseekV3ForCausalLM(config).to(dtype=torch.bfloat16) -init_weights_tiny_model(model) + +# Build a random bf16 model, then round-trip through disk with `quantization_config=FineGrainedFP8Config(...)` +# so transformers' FP8 quantizer rewrites Linear layers to FP8Linear and writes FP8 weights + scales. +# Needs a GPU with compute capability >= 8.9 (H100+); on older hardware the quantizer auto-dequantizes to bf16. +with tempfile.TemporaryDirectory() as tmpdir: + bf16_model = DeepseekV3ForCausalLM(config).to(dtype=torch.bfloat16, device="cuda") + init_weights_tiny_model(bf16_model) + bf16_model.save_pretrained(tmpdir) + tokenizer.save_pretrained(tmpdir) + del bf16_model + torch.cuda.empty_cache() + + quantization_config = FineGrainedFP8Config(activation_scheme="dynamic", weight_block_size=[32, 32]) + model = DeepseekV3ForCausalLM.from_pretrained( + tmpdir, + quantization_config=quantization_config, + dtype=torch.bfloat16, + device_map="cuda", + ) + +# `dtype=torch.bfloat16` casts the whole model, including the FP32 per-block scales the quantizer +# created. Restore them to FP32 to match the reference's dtype pattern. +for module in model.modules(): + if hasattr(module, "weight_scale_inv") and module.weight_scale_inv is not None: + module.weight_scale_inv.data = module.weight_scale_inv.data.float() + smoke_test(model, tokenizer) check_dtype_pattern(MODEL_ID, model) print_config_diff(MODEL_ID, model) diff --git a/tests/conftest.py b/tests/conftest.py index 402c14f635b..10a602d55d7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -39,6 +39,7 @@ # Add model_id: revision mappings here to test PRs "trl-internal-testing/tiny-CohereForCausalLM": "refs/pr/1", "trl-internal-testing/tiny-Cohere2ForCausalLM": "refs/pr/1", + "trl-internal-testing/tiny-DeepseekV3ForCausalLM": "refs/pr/2", "trl-internal-testing/tiny-Gemma3ForConditionalGeneration": "refs/pr/7", } From 59cb16e8724f523d1c86b137d54e2847faefc024 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Fri, 24 Apr 2026 20:21:00 +0000 Subject: [PATCH 08/20] revert to keep this focused --- .../for_causal_lm/cohere2_for_causal_lm.py | 19 +----- .../for_causal_lm/cohere_for_causal_lm.py | 4 +- .../deepseek_v3_for_causal_lm.py | 66 ++----------------- .../deepseek_v3_for_causal_lm_0528.py | 4 +- .../falcon_mamba_for_causal_lm.py | 4 +- .../for_causal_lm/gemma2_for_causal_lm.py | 4 +- .../for_causal_lm/gemma_for_causal_lm.py | 4 +- .../for_causal_lm/glm4_moe_for_causal_lm.py | 4 +- .../for_causal_lm/gpt2_lm_head_model.py | 4 +- .../for_causal_lm/gpt_neox_for_causal_lm.py | 4 +- .../for_causal_lm/gpt_oss_for_causal_lm.py | 4 +- .../for_causal_lm/llama_for_causal_lm_3.py | 4 +- .../for_causal_lm/llama_for_causal_lm_3_1.py | 4 +- .../for_causal_lm/llama_for_causal_lm_3_2.py | 4 +- .../mistral_for_causal_lm_0_1.py | 4 +- .../mistral_for_causal_lm_0_2.py | 4 +- .../for_causal_lm/opt_for_causal_lm.py | 4 +- .../for_causal_lm/phi3_for_causal_lm_3.py | 4 +- .../for_causal_lm/phi3_for_causal_lm_3_5.py | 4 +- .../for_causal_lm/qwen2_for_causal_lm_2_5.py | 4 +- .../qwen2_for_causal_lm_2_5_coder.py | 4 +- .../for_causal_lm/qwen3_for_causal_lm.py | 4 +- .../for_causal_lm/qwen3_moe_for_causal_lm.py | 4 +- .../small_qwen2_for_causal_lm_2_5.py | 4 +- .../small_qwen3_for_causal_lm.py | 4 +- .../gemma3_for_conditional_generation.py | 2 + tests/conftest.py | 4 -- 27 files changed, 56 insertions(+), 127 deletions(-) diff --git a/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py index 0b3c779546a..6632bbb13b0 100644 --- a/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/cohere2_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, Cohere2Config, Cohere2ForCausalLM, GenerationConfig +from transformers import AutoTokenizer, Cohere2Config, Cohere2ForCausalLM, GenerationConfig from .._common import ( check_dtype_pattern, @@ -33,27 +33,12 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = Cohere2Config( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=8, num_attention_heads=4, num_key_value_heads=2, num_hidden_layers=2, intermediate_size=32, - bos_token_id=2, - eos_token_id=3, - logit_scale=1.0, - max_position_embeddings=500000, - rope_theta=50000, - cache_implementation="hybrid", - layer_switch=4, - order_of_interleaved_layers="local_attn_first", - position_embedding_type="rope_gptj", - rotary_pct=1.0, - use_embedding_sharing=True, - use_gated_activation=True, - use_parallel_block=True, - use_parallel_embedding=False, - use_qk_norm=False, ) model = Cohere2ForCausalLM(config).to(dtype=torch.bfloat16) init_weights_tiny_model(model) diff --git a/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py index 6731ff52838..e561ed1474f 100644 --- a/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, CohereConfig, CohereForCausalLM, GenerationConfig +from transformers import AutoTokenizer, CohereConfig, CohereForCausalLM, GenerationConfig from .._common import ( check_dtype_pattern, @@ -33,7 +33,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = CohereConfig( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py index 6e5b3e1a55a..fe13290ecfa 100644 --- a/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm.py @@ -12,17 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import tempfile - import torch -from transformers import ( - AutoConfig, - AutoTokenizer, - DeepseekV3Config, - DeepseekV3ForCausalLM, - FineGrainedFP8Config, - GenerationConfig, -) +from transformers import AutoTokenizer, DeepseekV3Config, DeepseekV3ForCausalLM, GenerationConfig from .._common import ( check_dtype_pattern, @@ -41,61 +32,16 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) - -# DeepSeek-R1 uses weight_block_size=[128,128] upstream. We use [32,32] for the tiny so that smaller -# hidden dims still tile cleanly (every projection dim divisible by 32, ≥ 2 blocks per dim to avoid -# a scalar weight_scale_inv shape). Trade-off: drops out of the DeepGEMM fast path onto Triton; fine -# for a tiny used in tests. config = DeepseekV3Config( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, - hidden_size=64, + vocab_size=len(tokenizer.vocab), + hidden_size=8, num_attention_heads=4, num_key_value_heads=2, num_hidden_layers=2, - intermediate_size=64, - max_position_embeddings=163840, - rope_scaling={ - "beta_fast": 32.0, - "beta_slow": 1.0, - "factor": 40.0, - "mscale": 1.0, - "mscale_all_dim": 1.0, - "original_max_position_embeddings": 4096, - "rope_type": "yarn", - "type": "yarn", - }, - ep_size=1, - moe_layer_freq=1, - num_nextn_predict_layers=1, - scoring_func="sigmoid", - topk_method="noaux_tc", + intermediate_size=32, ) - -# Build a random bf16 model, then round-trip through disk with `quantization_config=FineGrainedFP8Config(...)` -# so transformers' FP8 quantizer rewrites Linear layers to FP8Linear and writes FP8 weights + scales. -# Needs a GPU with compute capability >= 8.9 (H100+); on older hardware the quantizer auto-dequantizes to bf16. -with tempfile.TemporaryDirectory() as tmpdir: - bf16_model = DeepseekV3ForCausalLM(config).to(dtype=torch.bfloat16, device="cuda") - init_weights_tiny_model(bf16_model) - bf16_model.save_pretrained(tmpdir) - tokenizer.save_pretrained(tmpdir) - del bf16_model - torch.cuda.empty_cache() - - quantization_config = FineGrainedFP8Config(activation_scheme="dynamic", weight_block_size=[32, 32]) - model = DeepseekV3ForCausalLM.from_pretrained( - tmpdir, - quantization_config=quantization_config, - dtype=torch.bfloat16, - device_map="cuda", - ) - -# `dtype=torch.bfloat16` casts the whole model, including the FP32 per-block scales the quantizer -# created. Restore them to FP32 to match the reference's dtype pattern. -for module in model.modules(): - if hasattr(module, "weight_scale_inv") and module.weight_scale_inv is not None: - module.weight_scale_inv.data = module.weight_scale_inv.data.float() - +model = DeepseekV3ForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) smoke_test(model, tokenizer) check_dtype_pattern(MODEL_ID, model) print_config_diff(MODEL_ID, model) diff --git a/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm_0528.py b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm_0528.py index 9fdb50c2d90..13db90d36f7 100644 --- a/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm_0528.py +++ b/scripts/generate_tiny_models/for_causal_lm/deepseek_v3_for_causal_lm_0528.py @@ -15,7 +15,7 @@ # Note: R1-0528 is kept in addition to R1 because it has a different chat template. import torch -from transformers import AutoConfig, AutoTokenizer, DeepseekV3Config, DeepseekV3ForCausalLM, GenerationConfig +from transformers import AutoTokenizer, DeepseekV3Config, DeepseekV3ForCausalLM, GenerationConfig from .._common import ( check_dtype_pattern, @@ -35,7 +35,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = DeepseekV3Config( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/falcon_mamba_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/falcon_mamba_for_causal_lm.py index f93f1653d5a..77133708ac1 100644 --- a/scripts/generate_tiny_models/for_causal_lm/falcon_mamba_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/falcon_mamba_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, FalconMambaConfig, FalconMambaForCausalLM, GenerationConfig +from transformers import AutoTokenizer, FalconMambaConfig, FalconMambaForCausalLM, GenerationConfig from .._common import ( check_dtype_pattern, @@ -33,7 +33,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = FalconMambaConfig( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/gemma2_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gemma2_for_causal_lm.py index ce38bcc8ae1..68935533aac 100644 --- a/scripts/generate_tiny_models/for_causal_lm/gemma2_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/gemma2_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, Gemma2Config, Gemma2ForCausalLM, GenerationConfig +from transformers import AutoTokenizer, Gemma2Config, Gemma2ForCausalLM, GenerationConfig from .._common import ( check_dtype_pattern, @@ -33,7 +33,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = Gemma2Config( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/gemma_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gemma_for_causal_lm.py index bdd85a572cc..22874adc2f9 100644 --- a/scripts/generate_tiny_models/for_causal_lm/gemma_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/gemma_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, GemmaConfig, GemmaForCausalLM, GenerationConfig +from transformers import AutoTokenizer, GemmaConfig, GemmaForCausalLM, GenerationConfig from .._common import ( check_dtype_pattern, @@ -33,7 +33,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = GemmaConfig( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/glm4_moe_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/glm4_moe_for_causal_lm.py index c96fbc1b89e..b0721795295 100644 --- a/scripts/generate_tiny_models/for_causal_lm/glm4_moe_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/glm4_moe_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Glm4MoeConfig, Glm4MoeForCausalLM +from transformers import AutoTokenizer, GenerationConfig, Glm4MoeConfig, Glm4MoeForCausalLM from .._common import ( check_dtype_pattern, @@ -33,7 +33,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = Glm4MoeConfig( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py b/scripts/generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py index a91c36aae73..8d1eb5103ea 100644 --- a/scripts/generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py +++ b/scripts/generate_tiny_models/for_causal_lm/gpt2_lm_head_model.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, GenerationConfig, GPT2Config, GPT2LMHeadModel +from transformers import AutoTokenizer, GenerationConfig, GPT2Config, GPT2LMHeadModel from .._common import ( check_dtype_pattern, @@ -33,7 +33,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = GPT2Config( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/gpt_neox_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gpt_neox_for_causal_lm.py index 18bc7d12956..080076f18c8 100644 --- a/scripts/generate_tiny_models/for_causal_lm/gpt_neox_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/gpt_neox_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, GenerationConfig, GPTNeoXConfig, GPTNeoXForCausalLM +from transformers import AutoTokenizer, GenerationConfig, GPTNeoXConfig, GPTNeoXForCausalLM from .._common import ( check_dtype_pattern, @@ -33,7 +33,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = GPTNeoXConfig( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/gpt_oss_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/gpt_oss_for_causal_lm.py index 270adc826e5..6ae18272af7 100644 --- a/scripts/generate_tiny_models/for_causal_lm/gpt_oss_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/gpt_oss_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, GenerationConfig, GptOssConfig, GptOssForCausalLM +from transformers import AutoTokenizer, GenerationConfig, GptOssConfig, GptOssForCausalLM from .._common import ( check_dtype_pattern, @@ -33,7 +33,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = GptOssConfig( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3.py b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3.py index f3808e8d992..1622c372870 100644 --- a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3.py +++ b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM +from transformers import AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM from .._common import ( check_dtype_pattern, @@ -33,7 +33,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = LlamaConfig( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_1.py b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_1.py index ed9e607c9f7..cb361901fcf 100644 --- a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_1.py +++ b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_1.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM +from transformers import AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM from .._common import ( check_dtype_pattern, @@ -33,7 +33,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = LlamaConfig( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_2.py b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_2.py index e4285e892b3..34fda29b5f9 100644 --- a/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_2.py +++ b/scripts/generate_tiny_models/for_causal_lm/llama_for_causal_lm_3_2.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM +from transformers import AutoTokenizer, GenerationConfig, LlamaConfig, LlamaForCausalLM from .._common import ( check_dtype_pattern, @@ -33,7 +33,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = LlamaConfig( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_1.py b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_1.py index 061c5d27cfa..34615475bf5 100644 --- a/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_1.py +++ b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_1.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, GenerationConfig, MistralConfig, MistralForCausalLM +from transformers import AutoTokenizer, GenerationConfig, MistralConfig, MistralForCausalLM from .._common import ( check_dtype_pattern, @@ -33,7 +33,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = MistralConfig( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_2.py b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_2.py index 6c477fd0922..aa4a9ce849a 100644 --- a/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_2.py +++ b/scripts/generate_tiny_models/for_causal_lm/mistral_for_causal_lm_0_2.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, GenerationConfig, MistralConfig, MistralForCausalLM +from transformers import AutoTokenizer, GenerationConfig, MistralConfig, MistralForCausalLM from .._common import ( check_dtype_pattern, @@ -33,7 +33,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = MistralConfig( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/opt_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/opt_for_causal_lm.py index 817223bad05..cf52a1c7c93 100644 --- a/scripts/generate_tiny_models/for_causal_lm/opt_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/opt_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, GenerationConfig, OPTConfig, OPTForCausalLM +from transformers import AutoTokenizer, GenerationConfig, OPTConfig, OPTForCausalLM from .._common import ( check_dtype_pattern, @@ -33,7 +33,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = OPTConfig( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3.py b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3.py index 87ab1a30db2..edb13a7634b 100644 --- a/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3.py +++ b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Phi3Config, Phi3ForCausalLM +from transformers import AutoTokenizer, GenerationConfig, Phi3Config, Phi3ForCausalLM from .._common import ( check_dtype_pattern, @@ -33,7 +33,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = Phi3Config( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3_5.py b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3_5.py index 0b6ce00bbdc..d5816214c0b 100644 --- a/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3_5.py +++ b/scripts/generate_tiny_models/for_causal_lm/phi3_for_causal_lm_3_5.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Phi3Config, Phi3ForCausalLM +from transformers import AutoTokenizer, GenerationConfig, Phi3Config, Phi3ForCausalLM from .._common import ( check_dtype_pattern, @@ -33,7 +33,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = Phi3Config( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5.py b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5.py index 0198bb05ed8..4b94615fcaf 100644 --- a/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5.py +++ b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM +from transformers import AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM from .._common import ( check_dtype_pattern, @@ -33,7 +33,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = Qwen2Config( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5_coder.py b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5_coder.py index 908fc0692b1..b7cce4494ba 100644 --- a/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5_coder.py +++ b/scripts/generate_tiny_models/for_causal_lm/qwen2_for_causal_lm_2_5_coder.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM +from transformers import AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM from .._common import ( check_dtype_pattern, @@ -33,7 +33,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = Qwen2Config( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm.py index 84ff6176136..00afd997aad 100644 --- a/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen3Config, Qwen3ForCausalLM +from transformers import AutoTokenizer, GenerationConfig, Qwen3Config, Qwen3ForCausalLM from .._common import ( check_dtype_pattern, @@ -35,7 +35,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, revision=REVISION) generation_config = GenerationConfig.from_pretrained(MODEL_ID, revision=REVISION) config = Qwen3Config( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen3_moe_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/qwen3_moe_for_causal_lm.py index 0f24e3175c6..ccf562f3c46 100644 --- a/scripts/generate_tiny_models/for_causal_lm/qwen3_moe_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/qwen3_moe_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. import torch -from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen3MoeConfig, Qwen3MoeForCausalLM +from transformers import AutoTokenizer, GenerationConfig, Qwen3MoeConfig, Qwen3MoeForCausalLM from .._common import ( check_dtype_pattern, @@ -33,7 +33,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = Qwen3MoeConfig( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=8, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/small_qwen2_for_causal_lm_2_5.py b/scripts/generate_tiny_models/for_causal_lm/small_qwen2_for_causal_lm_2_5.py index 5556958abce..acb0ef4f465 100644 --- a/scripts/generate_tiny_models/for_causal_lm/small_qwen2_for_causal_lm_2_5.py +++ b/scripts/generate_tiny_models/for_causal_lm/small_qwen2_for_causal_lm_2_5.py @@ -15,7 +15,7 @@ # Slightly bigger than the "tiny" variant: vLLM requires hidden_size // num_attention_heads = 32. import torch -from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM +from transformers import AutoTokenizer, GenerationConfig, Qwen2Config, Qwen2ForCausalLM from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test @@ -28,7 +28,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = Qwen2Config( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=128, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_causal_lm/small_qwen3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/small_qwen3_for_causal_lm.py index 2b3840ff535..873fcb9641f 100644 --- a/scripts/generate_tiny_models/for_causal_lm/small_qwen3_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/small_qwen3_for_causal_lm.py @@ -15,7 +15,7 @@ # Slightly bigger than the "tiny" variant: vLLM requires hidden_size // num_attention_heads = 32. import torch -from transformers import AutoConfig, AutoTokenizer, GenerationConfig, Qwen3Config, Qwen3ForCausalLM +from transformers import AutoTokenizer, GenerationConfig, Qwen3Config, Qwen3ForCausalLM from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test @@ -28,7 +28,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) generation_config = GenerationConfig.from_pretrained(MODEL_ID) config = Qwen3Config( - vocab_size=AutoConfig.from_pretrained(MODEL_ID).vocab_size, + vocab_size=len(tokenizer.vocab), hidden_size=128, num_attention_heads=4, num_key_value_heads=2, diff --git a/scripts/generate_tiny_models/for_conditional_generation/gemma3_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/gemma3_for_conditional_generation.py index 1cc3f2f361d..2f85c68abe8 100644 --- a/scripts/generate_tiny_models/for_conditional_generation/gemma3_for_conditional_generation.py +++ b/scripts/generate_tiny_models/for_conditional_generation/gemma3_for_conditional_generation.py @@ -37,6 +37,8 @@ "num_hidden_layers": 2, "hidden_size": 16, "num_attention_heads": 4, + "num_key_value_heads": 2, + "embed_dim": 64, } config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config) diff --git a/tests/conftest.py b/tests/conftest.py index 10a602d55d7..f071b789ffd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -37,10 +37,6 @@ MODEL_REVISIONS = { # Add model_id: revision mappings here to test PRs - "trl-internal-testing/tiny-CohereForCausalLM": "refs/pr/1", - "trl-internal-testing/tiny-Cohere2ForCausalLM": "refs/pr/1", - "trl-internal-testing/tiny-DeepseekV3ForCausalLM": "refs/pr/2", - "trl-internal-testing/tiny-Gemma3ForConditionalGeneration": "refs/pr/7", } From 9bc6ad4f6234cc103a09c618a81c77c9ddf2c09f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Fri, 24 Apr 2026 20:26:52 +0000 Subject: [PATCH 09/20] nit --- scripts/generate_tiny_models/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_tiny_models/README.md b/scripts/generate_tiny_models/README.md index 14d96a793c7..35a2b8fa9fb 100644 --- a/scripts/generate_tiny_models/README.md +++ b/scripts/generate_tiny_models/README.md @@ -6,7 +6,7 @@ This directory contains one script per tiny model used by the TRL test suite. Ea ``` generate_tiny_models/ -├── _common.py # shared helpers (push_to_hub, smoke_test, print_config_diff, ...) +├── _common.py # shared helpers (push_to_hub, smoke_test, print_config_diff, ...) ├── for_causal_lm/ # *ForCausalLM + GPT-2 LM head + small/PEFT variants ├── for_sequence_classification/ # *ForSequenceClassification (reward models) └── for_conditional_generation/ # *ForConditionalGeneration (VLMs + T5 + Bart encoder-decoder) From a7ad64a13f88c9229ec9818154df5a436d8371fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Fri, 24 Apr 2026 20:28:15 +0000 Subject: [PATCH 10/20] revert --- .../generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py index e561ed1474f..daafb1ec187 100644 --- a/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/cohere_for_causal_lm.py @@ -39,7 +39,6 @@ num_key_value_heads=2, num_hidden_layers=2, intermediate_size=32, - logit_scale=0.125, ) model = CohereForCausalLM(config).to(dtype=torch.float16) init_weights_tiny_model(model) From 6b361e1c61d22fd302d8af27eac80705a7f2998c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Fri, 24 Apr 2026 20:29:23 +0000 Subject: [PATCH 11/20] revove force and update readme --- scripts/generate_tiny_models/README.md | 13 ++++++++++--- scripts/generate_tiny_models/_common.py | 4 ++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/scripts/generate_tiny_models/README.md b/scripts/generate_tiny_models/README.md index 35a2b8fa9fb..644039837ac 100644 --- a/scripts/generate_tiny_models/README.md +++ b/scripts/generate_tiny_models/README.md @@ -25,10 +25,17 @@ Each script: 1. Checks that the installed `transformers` version matches the one pinned in the script (fails otherwise). 2. Builds the tiny model with random weights. 3. Runs `smoke_test` — a minimal forward pass to catch config misspecification and NaNs. -4. Runs `print_config_diff` — prints every flat-key difference between the reference Hub config and the tiny model's config (for debugging scale-downs). -5. Pushes the model, tokenizer/processor, generation config, and model card to the Hub. +4. Runs `check_dtype_pattern` — reads the reference safetensors header via the Hub API and flags any tensor whose dtype diverges from the reference (catches e.g. fp32 norms kept inside a bf16 checkpoint). +5. Runs `print_config_diff` — prints every flat-key difference between the reference Hub config and the tiny model's config (for debugging scale-downs). +6. Pushes the model, tokenizer/processor, generation config, and model card to the Hub in a single commit. -If the repo already exists on the Hub, the push is skipped (pass `force=True` in `push_to_hub(...)` to overwrite). +If the repo already exists on the Hub, the push is skipped by default. Pass `--create-pr` to open a PR against the existing repo instead: + +```bash +python -m scripts.generate_tiny_models.for_causal_lm.qwen3_for_causal_lm --create-pr +``` + +Direct overwrites of `main` aren't supported — update via `--create-pr` and merge the PR on the Hub. ## Version pinning diff --git a/scripts/generate_tiny_models/_common.py b/scripts/generate_tiny_models/_common.py index 914df2bd9e4..2d72fc84109 100644 --- a/scripts/generate_tiny_models/_common.py +++ b/scripts/generate_tiny_models/_common.py @@ -187,7 +187,7 @@ def _parse_args(): return args -def push_to_hub(model, tokenizer, generation_config, prefix=None, suffix=None, force=False, create_pr=None): +def push_to_hub(model, tokenizer, generation_config, prefix=None, suffix=None, create_pr=None): if create_pr is None: create_pr = _parse_args().create_pr @@ -201,7 +201,7 @@ def push_to_hub(model, tokenizer, generation_config, prefix=None, suffix=None, f repo_id += f"-{suffix}" exists = api.repo_exists(repo_id) - if exists and not force and not create_pr: + if exists and not create_pr: print(f"Model {repo_id} already exists, skipping (pass --create-pr to open a PR)") return From b2cf6034cc5ef3a1dc7d658fa6031a32483f0afc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Fri, 24 Apr 2026 20:30:52 +0000 Subject: [PATCH 12/20] nit commit message --- scripts/generate_tiny_models/_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_tiny_models/_common.py b/scripts/generate_tiny_models/_common.py index 2d72fc84109..45f95311e36 100644 --- a/scripts/generate_tiny_models/_common.py +++ b/scripts/generate_tiny_models/_common.py @@ -228,7 +228,7 @@ def push_to_hub(model, tokenizer, generation_config, prefix=None, suffix=None, c commit_info = api.create_commit( repo_id=repo_id, operations=operations, - commit_message=f"Upload tiny {model.__class__.__name__}", + commit_message=f"Upload {model.__class__.__name__}", create_pr=exists and create_pr, ) if commit_info.pr_url: From b4bae788923612d4a8547b9e664cfa3175df46d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Fri, 24 Apr 2026 20:33:46 +0000 Subject: [PATCH 13/20] better --- scripts/generate_tiny_models/_common.py | 17 +++++++++-------- .../qwen3_5_for_conditional_generation.py | 9 +++++---- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/scripts/generate_tiny_models/_common.py b/scripts/generate_tiny_models/_common.py index 45f95311e36..e9b7c5c7acc 100644 --- a/scripts/generate_tiny_models/_common.py +++ b/scripts/generate_tiny_models/_common.py @@ -24,7 +24,7 @@ from huggingface_hub import CommitOperationAdd, HfApi, ModelCard from packaging.version import Version from torch import nn -from transformers import AutoConfig +from transformers import AutoConfig, ProcessorMixin ORGANIZATION = "trl-internal-testing" @@ -59,7 +59,7 @@ def smoke_test(model, tokenizer_or_processor=None): model.eval() device = next(model.parameters()).device - if tokenizer_or_processor is not None and hasattr(tokenizer_or_processor, "image_processor"): + if isinstance(tokenizer_or_processor, ProcessorMixin): # VLM path: build a dummy (image, text) input via the processor. from PIL import Image @@ -89,14 +89,15 @@ def smoke_test(model, tokenizer_or_processor=None): with torch.no_grad(): out = model(**inputs) - logits = getattr(out, "logits", None) - if logits is None: - logits = getattr(out, "last_hidden_state", None) - if logits is None: + if "logits" in out: + output_tensor = out["logits"] + elif "last_hidden_state" in out: + output_tensor = out["last_hidden_state"] + else: raise RuntimeError(f"[smoke_test] {model.__class__.__name__}: no logits or last_hidden_state on output") - if torch.isnan(logits).any(): + if torch.isnan(output_tensor).any(): raise RuntimeError(f"[smoke_test] {model.__class__.__name__}: NaN in forward output") - print(f"[smoke_test] {model.__class__.__name__}: OK (output shape {tuple(logits.shape)})") + print(f"[smoke_test] {model.__class__.__name__}: OK (output shape {tuple(output_tensor.shape)})") def _flatten(d, prefix=""): diff --git a/scripts/generate_tiny_models/for_conditional_generation/qwen3_5_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/qwen3_5_for_conditional_generation.py index 2caa177b742..ce0d0dfd0e7 100644 --- a/scripts/generate_tiny_models/for_conditional_generation/qwen3_5_for_conditional_generation.py +++ b/scripts/generate_tiny_models/for_conditional_generation/qwen3_5_for_conditional_generation.py @@ -52,10 +52,11 @@ model = Qwen3_5ForConditionalGeneration(config).to(dtype=torch.bfloat16) # Restore float32 for linear-attn weights that the upstream model keeps in fp32. -for layer in model.model.language_model.layers: - if hasattr(layer, "linear_attn"): - layer.linear_attn.A_log.data = layer.linear_attn.A_log.data.float() - layer.linear_attn.norm.weight.data = layer.linear_attn.norm.weight.data.float() +for i, layer_type in enumerate(config.text_config.layer_types): + if layer_type == "linear_attention": + linear_attn = model.model.language_model.layers[i].linear_attn + linear_attn.A_log.data = linear_attn.A_log.data.float() + linear_attn.norm.weight.data = linear_attn.norm.weight.data.float() smoke_test(model, processor) check_dtype_pattern(MODEL_ID, model) From 0b7fa20434c7a61c1c6e7071338548991471374e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Fri, 24 Apr 2026 20:52:41 +0000 Subject: [PATCH 14/20] fix generation config peft --- .../for_causal_lm/peft_qwen3_for_causal_lm.py | 5 ++--- .../for_causal_lm/peft_qwen3_for_causal_lm_2.py | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm.py b/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm.py index 7f647facaf8..7688639f873 100644 --- a/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm.py +++ b/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm.py @@ -13,7 +13,7 @@ # limitations under the License. from peft import LoraConfig, get_peft_model -from transformers import GenerationConfig, Qwen3ForCausalLM +from transformers import Qwen3ForCausalLM from .._common import check_transformers_version, push_to_hub, smoke_test @@ -25,6 +25,5 @@ model = Qwen3ForCausalLM.from_pretrained(BASE, dtype="auto") model = get_peft_model(model, LoraConfig()) -generation_config = GenerationConfig.from_pretrained(BASE) smoke_test(model, None) -push_to_hub(model, None, generation_config, "tiny") +push_to_hub(model, None, None, "tiny") diff --git a/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm_2.py b/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm_2.py index 08fb10ce0a9..cf84cfd7dee 100644 --- a/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm_2.py +++ b/scripts/generate_tiny_models/for_causal_lm/peft_qwen3_for_causal_lm_2.py @@ -15,7 +15,7 @@ # Same model class as peft_qwen3_for_causal_lm.py, with different (random) LoRA weights. from peft import LoraConfig, get_peft_model -from transformers import GenerationConfig, Qwen3ForCausalLM +from transformers import Qwen3ForCausalLM from .._common import check_transformers_version, push_to_hub, smoke_test @@ -27,6 +27,5 @@ model = Qwen3ForCausalLM.from_pretrained(BASE, dtype="auto") model = get_peft_model(model, LoraConfig()) -generation_config = GenerationConfig.from_pretrained(BASE) smoke_test(model, None) -push_to_hub(model, None, generation_config, "tiny", "2") +push_to_hub(model, None, None, "tiny", "2") From 39bafd43446d9bc051600948a396e9a1f3059d5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= <45557362+qgallouedec@users.noreply.github.com> Date: Sun, 26 Apr 2026 11:16:48 -0400 Subject: [PATCH 15/20] Qwen3.6 integration (#5642) --- docs/source/chat_templates.md | 6 +- docs/source/grpo_trainer.md | 1 + tests/test_chat_template_utils.py | 18 +++ tests/test_data_utils.py | 7 + tests/test_dpo_trainer.py | 7 + tests/test_grpo_trainer.py | 7 + tests/test_rloo_trainer.py | 7 + tests/test_sft_trainer.py | 7 + trl/chat_template_utils.py | 17 ++- trl/chat_templates/README.md | 8 ++ trl/chat_templates/qwen3_6.jinja | 154 ++++++++++++++++++++ trl/chat_templates/qwen3_6_training.jinja | 162 ++++++++++++++++++++++ 12 files changed, 397 insertions(+), 4 deletions(-) create mode 100644 trl/chat_templates/qwen3_6.jinja create mode 100644 trl/chat_templates/qwen3_6_training.jinja diff --git a/docs/source/chat_templates.md b/docs/source/chat_templates.md index 94475e9a29c..964578298df 100644 --- a/docs/source/chat_templates.md +++ b/docs/source/chat_templates.md @@ -20,7 +20,7 @@ TRL ships patched templates under [`trl/chat_templates/`](https://github.com/hug ## Supported model families -TRL stores reference copies of the original templates so it can identify supported models at init and swap in a training template when needed. The following families are recognized: DeepSeek-V3, Gemma, GLM-4-MoE, GPT-OSS, Llama 3 / 3.1 / 3.2, Qwen2.5, Qwen3, Qwen3-VL, Qwen3.5. +TRL stores reference copies of the original templates so it can identify supported models at init and swap in a training template when needed. The following families are recognized: DeepSeek-V3, Gemma, GLM-4-MoE, GPT-OSS, Llama 3 / 3.1 / 3.2, Qwen2.5, Qwen3, Qwen3-VL, Qwen3.5, Qwen3.6. ## Training templates @@ -98,6 +98,10 @@ Patched Qwen2.5 template. Diff vs `qwen2_5.jinja`: Wrap assistant message output with `{% generation %}` / `{% endgeneration %}` so that `return_assistant_tokens_mask=True` produces correct masks for SFT assistant-only loss. +### `qwen3_6_training.jinja` + +Patched Qwen3.6 template. Diff vs `qwen3_6.jinja`: same set of changes as `qwen3_training.jinja` — require both `` and `` to be present before parsing, drop the `loop.index0 > ns.last_query_index` conditional so the thinking block is always emitted (prefix-preservation), and wrap assistant output with `{% generation %}` / `{% endgeneration %}` markers for SFT assistant-only loss. + ## Related utilities See [Chat Template Utilities](chat_template_utils) for the helper functions ([`clone_chat_template`], [`is_chat_template_prefix_preserving`], [`get_training_chat_template`]) that operate on these templates. diff --git a/docs/source/grpo_trainer.md b/docs/source/grpo_trainer.md index a4c9108c703..98133d1a441 100644 --- a/docs/source/grpo_trainer.md +++ b/docs/source/grpo_trainer.md @@ -751,6 +751,7 @@ Tested with: - [**Qwen3**](https://huggingface.co/collections/Qwen/qwen3) — e.g., `Qwen/Qwen3-0.6B` - [**Qwen3-VL**](https://huggingface.co/collections/Qwen/qwen3-vl) — e.g., `Qwen/Qwen3-VL-2B-Instruct` - [**Qwen3.5**](https://huggingface.co/collections/Qwen/qwen35) — e.g., `Qwen/Qwen3.5-2B` +- [**Qwen3.6**](https://huggingface.co/collections/Qwen/qwen36) — e.g., `Qwen/Qwen3.6-35B-A3B` > [!TIP] > Compatibility with all LLMs is not guaranteed. If you believe a model should be supported, feel free to open an issue on GitHub — or better yet, submit a pull request with the required changes. diff --git a/tests/test_chat_template_utils.py b/tests/test_chat_template_utils.py index d813caec447..9ce97297a3b 100644 --- a/tests/test_chat_template_utils.py +++ b/tests/test_chat_template_utils.py @@ -152,6 +152,7 @@ def test_add_response_schema(self, tokenizer_name): [ pytest.param("trl-internal-testing/tiny-Qwen3VLForConditionalGeneration", id="qwen3_vl"), pytest.param("trl-internal-testing/tiny-Qwen3_5ForConditionalGeneration", id="qwen35"), + pytest.param("trl-internal-testing/tiny-Qwen3_5MoeForConditionalGeneration-3.6", id="qwen36"), ], ) def test_add_response_schema_vlm(self, processor_name): @@ -222,6 +223,14 @@ class TestSupportsToolCalling: reason="Qwen3.5 tokenizer requires transformers>=5.0.0", ), ), + pytest.param( + "trl-internal-testing/tiny-Qwen3_5MoeForConditionalGeneration-3.6", + id="qwen36", + marks=pytest.mark.skipif( + Version(transformers.__version__) < Version("5.0.0"), + reason="Qwen3.5 tokenizer requires transformers>=5.0.0", + ), + ), ], ) def test_supports_tool_calling(self, model_id): @@ -444,6 +453,14 @@ def test_prefix_preserving_template_processor(self): pytest.param("trl-internal-testing/tiny-Phi3ForCausalLM-3", id="phi3"), pytest.param("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5", id="qwen2.5"), pytest.param("trl-internal-testing/tiny-Qwen3MoeForCausalLM", id="qwen3"), + pytest.param( + "trl-internal-testing/tiny-Qwen3_5MoeForConditionalGeneration-3.6", + id="qwen36", + marks=pytest.mark.skipif( + Version(transformers.__version__) < Version("5.0.0"), + reason="Qwen3.5 tokenizer requires transformers>=5.0.0", + ), + ), ], ) class TestGetTrainingChatTemplate: @@ -648,6 +665,7 @@ def test_assistant_masks_multi_turn(self, tokenizer_name): pytest.param("trl-internal-testing/tiny-Qwen3MoeForCausalLM", id="qwen3"), pytest.param("trl-internal-testing/tiny-Qwen3VLForConditionalGeneration", id="qwen3_vl"), pytest.param("trl-internal-testing/tiny-Qwen3_5ForConditionalGeneration", id="qwen35"), + pytest.param("trl-internal-testing/tiny-Qwen3_5MoeForConditionalGeneration-3.6", id="qwen36"), pytest.param( "trl-internal-testing/tiny-Gemma4ForConditionalGeneration", id="gemma4", diff --git a/tests/test_data_utils.py b/tests/test_data_utils.py index 8c77ab37e00..3498a8f1850 100644 --- a/tests/test_data_utils.py +++ b/tests/test_data_utils.py @@ -553,6 +553,13 @@ class TestApplyChatTemplate(TrlTestCase): reason="Qwen3.5 tokenizer requires transformers>=5.0.0", ), ), + pytest.param( + "trl-internal-testing/tiny-Qwen3_5MoeForConditionalGeneration-3.6", + marks=pytest.mark.skipif( + Version(transformers.__version__) < Version("5.0.0"), + reason="Qwen3.5 tokenizer requires transformers>=5.0.0", + ), + ), ] conversational_examples = [ diff --git a/tests/test_dpo_trainer.py b/tests/test_dpo_trainer.py index ddff0f29cdd..5000c0f449a 100644 --- a/tests/test_dpo_trainer.py +++ b/tests/test_dpo_trainer.py @@ -1051,6 +1051,13 @@ def test_tag_added_peft(self): reason="Qwen3.5 models were introduced in transformers-5.2.0", ), ), + pytest.param( + "trl-internal-testing/tiny-Qwen3_5MoeForConditionalGeneration-3.6", + marks=pytest.mark.skipif( + Version(transformers.__version__) < Version("5.2.0"), + reason="Qwen3.5 models were introduced in transformers-5.2.0", + ), + ), ], ) @require_vision diff --git a/tests/test_grpo_trainer.py b/tests/test_grpo_trainer.py index bbb861eec14..8f3e14660d8 100644 --- a/tests/test_grpo_trainer.py +++ b/tests/test_grpo_trainer.py @@ -1939,6 +1939,13 @@ def test_prepare_input_called_with_correct_data(self): reason="Qwen3.5 models were introduced in transformers-5.2.0", ), ), + pytest.param( + "trl-internal-testing/tiny-Qwen3_5MoeForConditionalGeneration-3.6", + marks=pytest.mark.skipif( + Version(transformers.__version__) < Version("5.2.0"), + reason="Qwen3.5 models were introduced in transformers-5.2.0", + ), + ), # "trl-internal-testing/tiny-SmolVLMForConditionalGeneration", seems not to support bf16 properly ], ) diff --git a/tests/test_rloo_trainer.py b/tests/test_rloo_trainer.py index 39f766ed5a9..1bea5a80d7d 100644 --- a/tests/test_rloo_trainer.py +++ b/tests/test_rloo_trainer.py @@ -1331,6 +1331,13 @@ def test_prepare_input_called_with_correct_data(self): reason="Qwen3.5 models were introduced in transformers-5.2.0", ), ), + pytest.param( + "trl-internal-testing/tiny-Qwen3_5MoeForConditionalGeneration-3.6", + marks=pytest.mark.skipif( + Version(transformers.__version__) < Version("5.2.0"), + reason="Qwen3.5 models were introduced in transformers-5.2.0", + ), + ), # "trl-internal-testing/tiny-SmolVLMForConditionalGeneration", seems not to support bf16 properly ], ) diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index 95e06238ce5..2c10d2b7e94 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -1660,6 +1660,13 @@ def test_tag_added_peft(self): reason="Qwen3.5 models were introduced in transformers-5.2.0", ), ), + pytest.param( + "trl-internal-testing/tiny-Qwen3_5MoeForConditionalGeneration-3.6", + marks=pytest.mark.skipif( + Version(transformers.__version__) < Version("5.2.0"), + reason="Qwen3.5 models were introduced in transformers-5.2.0", + ), + ), ], ) @require_vision diff --git a/trl/chat_template_utils.py b/trl/chat_template_utils.py index b754b5b0316..fccb0a578e3 100644 --- a/trl/chat_template_utils.py +++ b/trl/chat_template_utils.py @@ -332,6 +332,8 @@ def clone_chat_template( qwen3_5_chat_template_4b_and_above = (_CHAT_TEMPLATES_DIR / "qwen3_5_4b_and_above.jinja").read_text() +qwen3_6_chat_template = (_CHAT_TEMPLATES_DIR / "qwen3_6.jinja").read_text() + ProcessingClassT = TypeVar("ProcessingClassT", PreTrainedTokenizerBase, ProcessorMixin) @@ -384,7 +386,11 @@ def add_response_schema(processing_class: ProcessingClassT) -> ProcessingClassT: tokenizer.response_schema = llama3_schema elif chat_template in [qwen3_chat_template, qwen3_vl_chat_template]: tokenizer.response_schema = qwen3_schema - elif chat_template in [qwen3_5_chat_template_2b_and_below, qwen3_5_chat_template_4b_and_above]: + elif chat_template in [ + qwen3_5_chat_template_2b_and_below, + qwen3_5_chat_template_4b_and_above, + qwen3_6_chat_template, + ]: tokenizer.response_schema = qwen3_5_schema else: raise ValueError( @@ -539,6 +545,8 @@ def is_chat_template_prefix_preserving(processing_class: PreTrainedTokenizerBase qwen3_training_chat_template = (_CHAT_TEMPLATES_DIR / "qwen3_training.jinja").read_text() +qwen3_6_training_chat_template = (_CHAT_TEMPLATES_DIR / "qwen3_6_training.jinja").read_text() + def get_training_chat_template(tokenizer: PreTrainedTokenizerBase) -> str | None: r""" @@ -546,8 +554,8 @@ def get_training_chat_template(tokenizer: PreTrainedTokenizerBase) -> str | None Returns a patched chat template that is prefix-preserving and includes `{%% generation %%}` / `{%% endgeneration %%}` markers for assistant-only loss masking. Returns `None` if the tokenizer's template already satisfies both - requirements. Currently DeepSeek-V3, Gemma, Gemma2, GLM-4-MoE, GPT-OSS, LLaMA 3, Phi-3, Qwen2.5, and Qwen3 are - supported. + requirements. Currently DeepSeek-V3, Gemma, Gemma2, GLM-4-MoE, GPT-OSS, LLaMA 3, Phi-3, Qwen2.5, Qwen3, and Qwen3.6 + are supported. Args: tokenizer (`PreTrainedTokenizerBase`): @@ -622,6 +630,9 @@ def get_training_chat_template(tokenizer: PreTrainedTokenizerBase) -> str | None if tokenizer.chat_template == qwen3_chat_template: return qwen3_training_chat_template + if tokenizer.chat_template == qwen3_6_chat_template: + return qwen3_6_training_chat_template + raise ValueError( "The tokenizer's chat template is not training-compatible (missing prefix-preservation or " "`{% generation %}` markers) and patching is not supported for this template. " diff --git a/trl/chat_templates/README.md b/trl/chat_templates/README.md index 2c07893c07d..9b9f0243081 100644 --- a/trl/chat_templates/README.md +++ b/trl/chat_templates/README.md @@ -53,6 +53,10 @@ Original Qwen3-VL chat template. Unlike text-only Qwen3, this template is alread Original Qwen3.5 chat templates. +### `qwen3_6.jinja` + +Original Qwen3.6 chat template (shared across `Qwen3.6-27B`, `Qwen3.6-35B-A3B`, and their FP8 variants). Differs from `qwen3_5_4b_and_above.jinja` by adding a `preserve_thinking` flag and tweaking how non-string tool-call argument values are stringified. + ## Training templates Patched templates that fix training-specific issues. Swapped in at init when tools are enabled (GRPO) or when `assistant_only_loss=True` (SFT). @@ -135,3 +139,7 @@ Always include the thinking block regardless of message position. The original c ``` Wrap assistant message output with `{% generation %}` / `{% endgeneration %}` so that `return_assistant_tokens_mask=True` produces correct masks for SFT assistant-only loss. + +### `qwen3_6_training.jinja` + +Patched Qwen3.6 template. Same diff as `qwen3_training.jinja` (require both `` and `` before parsing, drop the `loop.index0 > ns.last_query_index` conditional so the thinking block is always emitted, wrap assistant output in `{% generation %}` / `{% endgeneration %}`), applied to the Qwen3.6 base template. diff --git a/trl/chat_templates/qwen3_6.jinja b/trl/chat_templates/qwen3_6.jinja new file mode 100644 index 00000000000..a8755d827c0 --- /dev/null +++ b/trl/chat_templates/qwen3_6.jinja @@ -0,0 +1,154 @@ +{%- set image_count = namespace(value=0) %} +{%- set video_count = namespace(value=0) %} +{%- macro render_content(content, do_vision_count, is_system_content=false) %} + {%- if content is string %} + {{- content }} + {%- elif content is iterable and content is not mapping %} + {%- for item in content %} + {%- if 'image' in item or 'image_url' in item or item.type == 'image' %} + {%- if is_system_content %} + {{- raise_exception('System message cannot contain images.') }} + {%- endif %} + {%- if do_vision_count %} + {%- set image_count.value = image_count.value + 1 %} + {%- endif %} + {%- if add_vision_id %} + {{- 'Picture ' ~ image_count.value ~ ': ' }} + {%- endif %} + {{- '<|vision_start|><|image_pad|><|vision_end|>' }} + {%- elif 'video' in item or item.type == 'video' %} + {%- if is_system_content %} + {{- raise_exception('System message cannot contain videos.') }} + {%- endif %} + {%- if do_vision_count %} + {%- set video_count.value = video_count.value + 1 %} + {%- endif %} + {%- if add_vision_id %} + {{- 'Video ' ~ video_count.value ~ ': ' }} + {%- endif %} + {{- '<|vision_start|><|video_pad|><|vision_end|>' }} + {%- elif 'text' in item %} + {{- item.text }} + {%- else %} + {{- raise_exception('Unexpected item type in content.') }} + {%- endif %} + {%- endfor %} + {%- elif content is none or content is undefined %} + {{- '' }} + {%- else %} + {{- raise_exception('Unexpected content type.') }} + {%- endif %} +{%- endmacro %} +{%- if not messages %} + {{- raise_exception('No messages provided.') }} +{%- endif %} +{%- if tools and tools is iterable and tools is not mapping %} + {{- '<|im_start|>system\n' }} + {{- "# Tools\n\nYou have access to the following functions:\n\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n" }} + {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }} + {%- if messages[0].role == 'system' %} + {%- set content = render_content(messages[0].content, false, true)|trim %} + {%- if content %} + {{- '\n\n' + content }} + {%- endif %} + {%- endif %} + {{- '<|im_end|>\n' }} +{%- else %} + {%- if messages[0].role == 'system' %} + {%- set content = render_content(messages[0].content, false, true)|trim %} + {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" %} + {%- set content = render_content(message.content, false)|trim %} + {%- if not(content.startswith('') and content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if ns.multi_step_tool %} + {{- raise_exception('No user query found in messages.') }} +{%- endif %} +{%- for message in messages %} + {%- set content = render_content(message.content, true)|trim %} + {%- if message.role == "system" %} + {%- if not loop.first %} + {{- raise_exception('System message must be at the beginning.') }} + {%- endif %} + {%- elif message.role == "user" %} + {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- set reasoning_content = reasoning_content|trim %} + {%- if (preserve_thinking is defined and preserve_thinking is true) or (loop.index0 > ns.last_query_index) %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {%- if loop.first %} + {%- if content|trim %} + {{- '\n\n\n\n' }} + {%- else %} + {{- '\n\n' }} + {%- endif %} + {%- else %} + {{- '\n\n\n' }} + {%- endif %} + {%- if tool_call.arguments is defined %} + {%- for args_name, args_value in tool_call.arguments|items %} + {{- '\n' }} + {%- set args_value = args_value | string if args_value is string else args_value | tojson | safe %} + {{- args_value }} + {{- '\n\n' }} + {%- endfor %} + {%- endif %} + {{- '\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.previtem and loop.previtem.role != "tool" %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- content }} + {{- '\n' }} + {%- if not loop.last and loop.nextitem.role != "tool" %} + {{- '<|im_end|>\n' }} + {%- elif loop.last %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- else %} + {{- raise_exception('Unexpected message role.') }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- else %} + {{- '\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/trl/chat_templates/qwen3_6_training.jinja b/trl/chat_templates/qwen3_6_training.jinja new file mode 100644 index 00000000000..e4e705768d7 --- /dev/null +++ b/trl/chat_templates/qwen3_6_training.jinja @@ -0,0 +1,162 @@ +{#- Training variant of the Qwen3.6 chat template (see qwen3_6.jinja for the original). + Modifications vs the original: + - {%- if '' in content %} → {%- if '' in content and '' in content %} + Always check for both tags to avoid edge cases where the model generates only one tag. + - Removed the loop.index0 > ns.last_query_index conditional; always include thinking block. + This makes the template prefix-preserving for the [user, assistant] → [user, assistant, tool] transition. + - Added {% generation %} / {% endgeneration %} around assistant message output to support + assistant-only loss masking in SFT training. +-#} +{%- set image_count = namespace(value=0) %} +{%- set video_count = namespace(value=0) %} +{%- macro render_content(content, do_vision_count, is_system_content=false) %} + {%- if content is string %} + {{- content }} + {%- elif content is iterable and content is not mapping %} + {%- for item in content %} + {%- if 'image' in item or 'image_url' in item or item.type == 'image' %} + {%- if is_system_content %} + {{- raise_exception('System message cannot contain images.') }} + {%- endif %} + {%- if do_vision_count %} + {%- set image_count.value = image_count.value + 1 %} + {%- endif %} + {%- if add_vision_id %} + {{- 'Picture ' ~ image_count.value ~ ': ' }} + {%- endif %} + {{- '<|vision_start|><|image_pad|><|vision_end|>' }} + {%- elif 'video' in item or item.type == 'video' %} + {%- if is_system_content %} + {{- raise_exception('System message cannot contain videos.') }} + {%- endif %} + {%- if do_vision_count %} + {%- set video_count.value = video_count.value + 1 %} + {%- endif %} + {%- if add_vision_id %} + {{- 'Video ' ~ video_count.value ~ ': ' }} + {%- endif %} + {{- '<|vision_start|><|video_pad|><|vision_end|>' }} + {%- elif 'text' in item %} + {{- item.text }} + {%- else %} + {{- raise_exception('Unexpected item type in content.') }} + {%- endif %} + {%- endfor %} + {%- elif content is none or content is undefined %} + {{- '' }} + {%- else %} + {{- raise_exception('Unexpected content type.') }} + {%- endif %} +{%- endmacro %} +{%- if not messages %} + {{- raise_exception('No messages provided.') }} +{%- endif %} +{%- if tools and tools is iterable and tools is not mapping %} + {{- '<|im_start|>system\n' }} + {{- "# Tools\n\nYou have access to the following functions:\n\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n" }} + {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }} + {%- if messages[0].role == 'system' %} + {%- set content = render_content(messages[0].content, false, true)|trim %} + {%- if content %} + {{- '\n\n' + content }} + {%- endif %} + {%- endif %} + {{- '<|im_end|>\n' }} +{%- else %} + {%- if messages[0].role == 'system' %} + {%- set content = render_content(messages[0].content, false, true)|trim %} + {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" %} + {%- set content = render_content(message.content, false)|trim %} + {%- if not(content.startswith('') and content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if ns.multi_step_tool %} + {{- raise_exception('No user query found in messages.') }} +{%- endif %} +{%- for message in messages %} + {%- set content = render_content(message.content, true)|trim %} + {%- if message.role == "system" %} + {%- if not loop.first %} + {{- raise_exception('System message must be at the beginning.') }} + {%- endif %} + {%- elif message.role == "user" %} + {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in content and '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- set reasoning_content = reasoning_content|trim %} + {{- '<|im_start|>' + message.role + '\n' }} + {%- generation %} + {{- '\n' + reasoning_content + '\n\n\n' + content }} + {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {%- if loop.first %} + {%- if content|trim %} + {{- '\n\n\n\n' }} + {%- else %} + {{- '\n\n' }} + {%- endif %} + {%- else %} + {{- '\n\n\n' }} + {%- endif %} + {%- if tool_call.arguments is defined %} + {%- for args_name, args_value in tool_call.arguments|items %} + {{- '\n' }} + {%- set args_value = args_value | string if args_value is string else args_value | tojson | safe %} + {{- args_value }} + {{- '\n\n' }} + {%- endfor %} + {%- endif %} + {{- '\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- endgeneration %} + {%- elif message.role == "tool" %} + {%- if loop.previtem and loop.previtem.role != "tool" %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- content }} + {{- '\n' }} + {%- if not loop.last and loop.nextitem.role != "tool" %} + {{- '<|im_end|>\n' }} + {%- elif loop.last %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- else %} + {{- raise_exception('Unexpected message role.') }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- else %} + {{- '\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file From 07e65d736e7c8a9f90cd68040cd04eca8b3bb045 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= <45557362+qgallouedec@users.noreply.github.com> Date: Sun, 26 Apr 2026 11:28:01 -0400 Subject: [PATCH 16/20] Release: v1.3 (#5647) --- .github/workflows/tests_latest.yml | 2 +- CITATION.cff | 2 +- VERSION | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests_latest.yml b/.github/workflows/tests_latest.yml index 54debaba39d..94265905b40 100644 --- a/.github/workflows/tests_latest.yml +++ b/.github/workflows/tests_latest.yml @@ -26,7 +26,7 @@ jobs: steps: - name: Git checkout uses: actions/checkout@v6 - with: { ref: v1.2-release } + with: { ref: v1.3-release } - name: Set up Python 3.12 uses: actions/setup-python@v6 diff --git a/CITATION.cff b/CITATION.cff index c78b65d38fd..619482508c4 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -37,5 +37,5 @@ keywords: - language model alignment - post-training license: Apache-2.0 -version: '1.2' +version: '1.3' date-released: '2020-03-27' diff --git a/VERSION b/VERSION index 14c65ab0d00..589268e6fed 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.3.0.dev0 \ No newline at end of file +1.3.0 \ No newline at end of file From 7198c14b6e3ceabecf1af74053b3c9b10051c639 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= <45557362+qgallouedec@users.noreply.github.com> Date: Sun, 26 Apr 2026 11:29:30 -0400 Subject: [PATCH 17/20] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20Bump=20dev=20version?= =?UTF-8?q?=20(#5648)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 589268e6fed..b58da95673d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.3.0 \ No newline at end of file +1.4.0.dev0 \ No newline at end of file From 71b82192d5192a6d860b4552c8ac17a3c21dec23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Mon, 27 Apr 2026 16:21:55 +0000 Subject: [PATCH 18/20] Add Qwen3.6 model generation script with updated configuration --- .../qwen3_6_for_conditional_generation.py | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 scripts/generate_tiny_models/for_conditional_generation/qwen3_6_for_conditional_generation.py diff --git a/scripts/generate_tiny_models/for_conditional_generation/qwen3_6_for_conditional_generation.py b/scripts/generate_tiny_models/for_conditional_generation/qwen3_6_for_conditional_generation.py new file mode 100644 index 00000000000..a0f5bef6e7a --- /dev/null +++ b/scripts/generate_tiny_models/for_conditional_generation/qwen3_6_for_conditional_generation.py @@ -0,0 +1,63 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Notes: +# - Qwen3.6 reuses the Qwen3_5Moe class with extra MoE config fields +# (num_experts, num_experts_per_tok, moe_intermediate_size, shared_expert_intermediate_size). +# - Same layer_types/full_attention_interval workaround as Qwen3.5: tiny models (2 layers) need +# one full-attention layer to keep the dynamic cache happy. +# - The vision config expects `depth`/`num_heads` (not `num_hidden_layers`/`num_attention_heads`). +# - Unlike Qwen3.5, Qwen3.6 stores linear-attn weights in bf16, so no float32 cast is needed. + +import torch +from transformers import AutoConfig, AutoProcessor, GenerationConfig, Qwen3_5MoeForConditionalGeneration + +from .._common import check_dtype_pattern, check_transformers_version, print_config_diff, push_to_hub, smoke_test + + +TRANSFORMERS_VERSION = "5.2.0" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen3.6-35B-A3B" + +processor = AutoProcessor.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) + +text_config = { + "num_hidden_layers": 2, + "hidden_size": 16, + "num_attention_heads": 4, + "num_key_value_heads": 2, + "layer_types": ["linear_attention", "full_attention"], + "full_attention_interval": 2, + "num_experts": 4, + "num_experts_per_tok": 2, + "moe_intermediate_size": 32, + "shared_expert_intermediate_size": 32, +} +vision_config = { + "hidden_size": 16, + "depth": 2, + "num_heads": 4, + "intermediate_size": 32, + "out_hidden_size": 16, +} + +config = AutoConfig.from_pretrained(MODEL_ID, text_config=text_config, vision_config=vision_config) +model = Qwen3_5MoeForConditionalGeneration(config).to(dtype=torch.bfloat16) + +smoke_test(model, processor) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, processor, generation_config, "tiny", "3.6") From 545e5e95231ce5cad304463f9de956d5947974be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Mon, 27 Apr 2026 16:24:32 +0000 Subject: [PATCH 19/20] merge main From 4730fecada9be0370f9e271b93ca1cc09cc4885d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 28 Apr 2026 00:14:39 +0000 Subject: [PATCH 20/20] Qwen3 Instruct-2507 --- .../qwen3_for_causal_lm_instruct_2507.py | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm_instruct_2507.py diff --git a/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm_instruct_2507.py b/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm_instruct_2507.py new file mode 100644 index 00000000000..6f84c69005b --- /dev/null +++ b/scripts/generate_tiny_models/for_causal_lm/qwen3_for_causal_lm_instruct_2507.py @@ -0,0 +1,50 @@ +# Copyright 2020-2026 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Qwen3-4B-Instruct-2507 ships the non-thinking chat template, distinct from the default Qwen3 template. + +import torch +from transformers import AutoTokenizer, GenerationConfig, Qwen3Config, Qwen3ForCausalLM + +from .._common import ( + check_dtype_pattern, + check_transformers_version, + init_weights_tiny_model, + print_config_diff, + push_to_hub, + smoke_test, +) + + +TRANSFORMERS_VERSION = "4.56.2" +check_transformers_version(TRANSFORMERS_VERSION) + +MODEL_ID = "Qwen/Qwen3-4B-Instruct-2507" + +tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) +generation_config = GenerationConfig.from_pretrained(MODEL_ID) +config = Qwen3Config( + vocab_size=len(tokenizer.vocab), + hidden_size=8, + num_attention_heads=4, + num_key_value_heads=2, + num_hidden_layers=2, + intermediate_size=32, +) +model = Qwen3ForCausalLM(config).to(dtype=torch.bfloat16) +init_weights_tiny_model(model) +smoke_test(model, tokenizer) +check_dtype_pattern(MODEL_ID, model) +print_config_diff(MODEL_ID, model) +push_to_hub(model, tokenizer, generation_config, "tiny", "Instruct-2507")