Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Idefics 3! #32473

Merged
merged 53 commits into from
Sep 25, 2024
Merged
Changes from 1 commit
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
842a28d
Add Idefics 3!
andimarafioti Aug 6, 2024
afce007
fixes to make both pipelines identical
andimarafioti Aug 7, 2024
3e3b31d
fix for quantized models
andimarafioti Aug 8, 2024
9c8ffc4
First pass at the review
andimarafioti Aug 8, 2024
7e3d7a6
remove vocab size from the main config (it's still in the text_config)
andimarafioti Aug 8, 2024
dd99bca
hot fix for merve
andimarafioti Aug 8, 2024
ddac9ec
Apply suggestions from code review
andimarafioti Aug 9, 2024
188bb76
re-add model_type for text_config
andimarafioti Aug 9, 2024
43fb214
remove support for old_cache
andimarafioti Aug 9, 2024
c9e0d85
remove hidden_size from main config
andimarafioti Aug 9, 2024
1b2b89c
rename idefics3 HF repo
andimarafioti Aug 9, 2024
6ff766f
few changes suggested in the PR
andimarafioti Aug 12, 2024
11c2e1a
fix to input_data_format computation
andimarafioti Aug 12, 2024
c1048ed
remove overwrite of _autoset_attn_implementation following @zucchini-…
andimarafioti Aug 12, 2024
a163564
improve example
andimarafioti Aug 12, 2024
6f0a479
few improvements from amy's review
andimarafioti Aug 12, 2024
8361fce
big change to enable processing input images as numpy arrays
andimarafioti Aug 12, 2024
32970d0
Changes to the code to uniformize processor kwargs
andimarafioti Aug 13, 2024
c504f00
image processing tests
andimarafioti Aug 13, 2024
a914e41
image processing tests fixes and some bugs they discovered
andimarafioti Aug 13, 2024
6722d13
addressed review comments from Yoni
andimarafioti Aug 13, 2024
0533eda
fix modeling tests
andimarafioti Aug 13, 2024
b034091
remove special tokens that are not special
andimarafioti Aug 15, 2024
47fb7ce
fixes tests
andimarafioti Aug 15, 2024
4032a6f
skip failing tests - they also fail for idefics2
andimarafioti Aug 21, 2024
757e834
added paper and readded the tests with multi gpu, who knows
andimarafioti Aug 27, 2024
7797279
Update docs/source/en/model_doc/idefics3.md
andimarafioti Aug 30, 2024
b478124
Apply suggestions from code review
andimarafioti Aug 30, 2024
ada6219
review amy until image_processing_idefics3
andimarafioti Aug 30, 2024
164fbe8
last comments from Amy
andimarafioti Sep 2, 2024
000c8ea
review amy
andimarafioti Sep 6, 2024
4d02e0c
Update src/transformers/models/idefics3/image_processing_idefics3.py
andimarafioti Sep 4, 2024
3bf03c2
Update src/transformers/models/idefics3/modeling_idefics3.py
andimarafioti Sep 4, 2024
57bfd51
Update docs/source/en/model_doc/idefics3.md
andimarafioti Sep 6, 2024
63b1d7f
doc improvement - amy review
andimarafioti Sep 6, 2024
6325fbc
fix runtime error during fine-tuning
andimarafioti Sep 10, 2024
76b8892
amy's review
andimarafioti Sep 16, 2024
9a20306
Update src/transformers/models/idefics3/image_processing_idefics3.py
andimarafioti Sep 16, 2024
3129920
Update src/transformers/models/idefics3/image_processing_idefics3.py
andimarafioti Sep 16, 2024
e1a10b3
Update src/transformers/models/idefics3/modeling_idefics3.py
andimarafioti Sep 16, 2024
4c3756f
ruff
andimarafioti Sep 16, 2024
fbaf07e
amy's comment on the order
andimarafioti Sep 16, 2024
87fa179
ruff ruff
andimarafioti Sep 17, 2024
23d4cf8
fix copies
andimarafioti Sep 17, 2024
9e925b9
square images when they are not splitted
andimarafioti Sep 17, 2024
215b636
ruff :(
andimarafioti Sep 17, 2024
2967974
Update src/transformers/models/idefics3/image_processing_idefics3.py
andimarafioti Sep 18, 2024
ee041bf
Update tests/models/idefics3/test_processing_idefics3.py
andimarafioti Sep 18, 2024
4aad266
fix small bug introduced in refactor
andimarafioti Sep 18, 2024
f1ae8ae
amy's image processing changes
andimarafioti Sep 19, 2024
39d88b2
fixes peft tests and ruff
andimarafioti Sep 19, 2024
383f0db
modify to_pil_image from transformers. and review from emanuele.
andimarafioti Sep 23, 2024
682b82b
add modified to_pil_image
andimarafioti Sep 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
rename idefics3 HF repo
  • Loading branch information
andimarafioti committed Sep 18, 2024
commit 1b2b89c465bf76c849e509ed36a93a0d6155f3de
Original file line number Diff line number Diff line change
@@ -31,7 +31,7 @@


EPILOG_TXT = """Example:
python transformers/src/transformers/models/idefics3/convert_idefics3_weights_to_hf.py --original_model_id HuggingFaceM4/idefics3-8b --output_hub_path org/idefics3
python transformers/src/transformers/models/idefics3/convert_idefics3_weights_to_hf.py --original_model_id HuggingFaceM4/Idefics3-8B-Llama3 --output_hub_path org/idefics3
"""


4 changes: 2 additions & 2 deletions src/transformers/models/idefics3/modeling_idefics3.py
Original file line number Diff line number Diff line change
@@ -1114,8 +1114,8 @@ def forward(
>>> image2 = load_image("https://cdn.britannica.com/59/94459-050-DBA42467/Skyline-Chicago.jpg")
>>> image3 = load_image("https://cdn.britannica.com/68/170868-050-8DDE8263/Golden-Gate-Bridge-San-Francisco.jpg")

>>> processor = AutoProcessor.from_pretrained("HuggingFaceM4/idefics3-8b")
>>> model = AutoModelForVision2Seq.from_pretrained("HuggingFaceM4/idefics3-8b", device_map="auto")
>>> processor = AutoProcessor.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3")
>>> model = AutoModelForVision2Seq.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3", device_map="auto")

>>> BAD_WORDS_IDS = processor.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
>>> EOS_WORDS_IDS = [processor.tokenizer.eos_token_id]
2 changes: 1 addition & 1 deletion src/transformers/models/idefics3/processing_idefics3.py
Original file line number Diff line number Diff line change
@@ -145,7 +145,7 @@ def __call__(
>>> from transformers import Idefics3Processor
>>> from transformers.image_utils import load_image

>>> processor = Idefics3Processor.from_pretrained("HuggingFaceM4/idefics3-8b", image_seq_len=2)
>>> processor = Idefics3Processor.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3", image_seq_len=2)
>>> processor.image_processor.do_image_splitting = False # Force as False to simplify the example

>>> url1 = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
6 changes: 3 additions & 3 deletions tests/models/idefics3/test_modeling_idefics3.py
Original file line number Diff line number Diff line change
@@ -467,7 +467,7 @@ def test_resize_embeddings_untied(self):
@require_torch
class Idefics3ForConditionalGenerationIntegrationTest(unittest.TestCase):
def setUp(self):
self.processor = AutoProcessor.from_pretrained("HuggingFaceM4/idefics3-8b-base")
self.processor = AutoProcessor.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3-base")
self.image1 = Image.open(
BytesIO(
requests.get(
@@ -493,7 +493,7 @@ def tearDown(self):
@slow
def test_integration_test(self):
model = Idefics3ForConditionalGeneration.from_pretrained(
"HuggingFaceM4/idefics3-8b-base",
"HuggingFaceM4/Idefics3-8B-Llama3-base",
torch_dtype=torch.bfloat16,
device_map="auto",
)
@@ -517,7 +517,7 @@ def test_integration_test(self):
def test_integration_test_4bit(self):
# Let' s make sure we test the preprocessing to replace what is used
model = Idefics3ForConditionalGeneration.from_pretrained(
"HuggingFaceM4/idefics3-8b-base", load_in_4bit=True, device_map="auto"
"HuggingFaceM4/Idefics3-8B-Llama3-base", load_in_4bit=True, device_map="auto"
)

# Create pixel inputs
2 changes: 1 addition & 1 deletion tests/models/idefics3/test_processing_idefics3.py
Original file line number Diff line number Diff line change
@@ -31,7 +31,7 @@
@require_vision
class Idefics3ProcessorTest(unittest.TestCase):
def setUp(self):
self.processor = Idefics3Processor.from_pretrained("HuggingFaceM4/idefics3-8b", image_seq_len=2)
self.processor = Idefics3Processor.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3", image_seq_len=2)
self.image1 = Image.open(
BytesIO(
requests.get(