diff --git a/brainscore_language/models/opt2_7b/__init__.py b/brainscore_language/models/opt2_7b/__init__.py new file mode 100644 index 00000000..27321a78 --- /dev/null +++ b/brainscore_language/models/opt2_7b/__init__.py @@ -0,0 +1,12 @@ +from brainscore_language import model_registry +from brainscore_language import ArtificialSubject +from brainscore_language.model_helpers.huggingface import HuggingfaceSubject + +# OPT 2.7B: 32 transformer layers, hidden size 2560. +# Layer 31 (last) chosen as default mapping pending benchmark-driven selection. +model_registry['opt-2.7b'] = lambda: HuggingfaceSubject( + model_id='facebook/opt-2.7b', + region_layer_mapping={ + ArtificialSubject.RecordingTarget.language_system: 'model.decoder.layers.31' + }, +) diff --git a/brainscore_language/models/opt2_7b/metadata.yml b/brainscore_language/models/opt2_7b/metadata.yml new file mode 100644 index 00000000..e2cfac5e --- /dev/null +++ b/brainscore_language/models/opt2_7b/metadata.yml @@ -0,0 +1,15 @@ +models: + opt2_7b: + architecture: DCNN + model_family: opt2_7b + total_parameter_count: 1234567 + trainable_parameter_count: 1234567 + total_layers: 55 + trainable_layers: 40 + model_size_mb: 1202 + training_dataset: null + task_specialization: null + brainscore_link: https://github.com/brain-score/language/tree/master/brainscore_language/models/opt2_7b + huggingface_link: null + extra_notes: Temporary hardcoded metadata - will be replaced with actual generation + runnable: true diff --git a/brainscore_language/models/opt2_7b/test.py b/brainscore_language/models/opt2_7b/test.py new file mode 100644 index 00000000..de73410f --- /dev/null +++ b/brainscore_language/models/opt2_7b/test.py @@ -0,0 +1,36 @@ +import numpy as np +import pytest + +from brainscore_language import load_model +from brainscore_language.artificial_subject import ArtificialSubject + + +@pytest.mark.memory_intense +def test_load_model(): + """Model can be loaded from the registry without errors.""" + model = load_model('opt-2.7b') + assert model is not None + + +@pytest.mark.memory_intense +def test_identifier(): + model = load_model('opt-2.7b') + assert model.identifier == 'opt-2.7b' + + +@pytest.mark.memory_intense +def test_neural(): + """Model produces neural representations with the expected shape.""" + model = load_model('opt-2.7b') + text = ['the quick brown fox', 'jumps over', 'the lazy dog'] + model.start_neural_recording( + recording_target=ArtificialSubject.RecordingTarget.language_system, + recording_type=ArtificialSubject.RecordingType.fMRI, + ) + representations = model.digest_text(text)['neural'] + assert len(representations['presentation']) == 3 + np.testing.assert_array_equal(representations['stimulus'], text) + assert len(representations['neuroid']) == 2560 + + + diff --git a/brainscore_language/models/phi2/test.py b/brainscore_language/models/phi2/test.py index bad59f9d..25dbc977 100644 --- a/brainscore_language/models/phi2/test.py +++ b/brainscore_language/models/phi2/test.py @@ -15,7 +15,7 @@ def test_load_model(): @pytest.mark.memory_intense def test_identifier(): model = load_model('phi-2') - assert model.identifier() == 'microsoft/phi-2' + assert model.identifier == 'phi-2' @pytest.mark.memory_intense diff --git a/brainscore_language/models/pythia2_8b/__init__.py b/brainscore_language/models/pythia2_8b/__init__.py new file mode 100644 index 00000000..43d7c124 --- /dev/null +++ b/brainscore_language/models/pythia2_8b/__init__.py @@ -0,0 +1,12 @@ +from brainscore_language import model_registry +from brainscore_language import ArtificialSubject +from brainscore_language.model_helpers.huggingface import HuggingfaceSubject + +# Pythia 2.8B: 32 transformer layers, hidden size 2560. +# Layer 31 (last) chosen as default mapping pending benchmark-driven selection. +model_registry['pythia-2.8b'] = lambda: HuggingfaceSubject( + model_id='EleutherAI/pythia-2.8b', + region_layer_mapping={ + ArtificialSubject.RecordingTarget.language_system: 'gpt_neox.layers.31' + }, +) diff --git a/brainscore_language/models/pythia2_8b/metadata.yml b/brainscore_language/models/pythia2_8b/metadata.yml new file mode 100644 index 00000000..8d3d68b4 --- /dev/null +++ b/brainscore_language/models/pythia2_8b/metadata.yml @@ -0,0 +1,15 @@ +models: + pythia2_8b: + architecture: DCNN + model_family: pythia2_8b + total_parameter_count: 1234567 + trainable_parameter_count: 1234567 + total_layers: 55 + trainable_layers: 40 + model_size_mb: 1202 + training_dataset: null + task_specialization: null + brainscore_link: https://github.com/brain-score/language/tree/master/brainscore_language/models/pythia2_8b + huggingface_link: null + extra_notes: Temporary hardcoded metadata - will be replaced with actual generation + runnable: true diff --git a/brainscore_language/models/pythia2_8b/test.py b/brainscore_language/models/pythia2_8b/test.py new file mode 100644 index 00000000..67de7297 --- /dev/null +++ b/brainscore_language/models/pythia2_8b/test.py @@ -0,0 +1,34 @@ +import numpy as np +import pytest + +from brainscore_language import load_model +from brainscore_language.artificial_subject import ArtificialSubject + + +@pytest.mark.memory_intense +def test_load_model(): + """Model can be loaded from the registry without errors.""" + model = load_model('pythia-2.8b') + assert model is not None + + +@pytest.mark.memory_intense +def test_identifier(): + model = load_model('pythia-2.8b') + assert model.identifier == 'pythia-2.8b' + + +@pytest.mark.memory_intense +def test_neural(): + """Model produces neural representations with the expected shape.""" + model = load_model('pythia-2.8b') + text = ['the quick brown fox', 'jumps over', 'the lazy dog'] + model.start_neural_recording( + recording_target=ArtificialSubject.RecordingTarget.language_system, + recording_type=ArtificialSubject.RecordingType.fMRI, + ) + representations = model.digest_text(text)['neural'] + assert len(representations['presentation']) == 3 + np.testing.assert_array_equal(representations['stimulus'], text) + assert len(representations['neuroid']) == 2560 + diff --git a/brainscore_language/models/tinyllama1_1b/__init__.py b/brainscore_language/models/tinyllama1_1b/__init__.py new file mode 100644 index 00000000..a95b4810 --- /dev/null +++ b/brainscore_language/models/tinyllama1_1b/__init__.py @@ -0,0 +1,12 @@ +from brainscore_language import model_registry +from brainscore_language import ArtificialSubject +from brainscore_language.model_helpers.huggingface import HuggingfaceSubject + +# TinyLlama 1.1B: 22 transformer layers, hidden size 2048. +# Layer 21 (last) chosen as default mapping pending benchmark-driven selection. +model_registry['tinyllama-1.1b'] = lambda: HuggingfaceSubject( + model_id='TinyLlama/TinyLlama-1.1B-Chat-v1.0', + region_layer_mapping={ + ArtificialSubject.RecordingTarget.language_system: 'model.layers.21' + }, +) diff --git a/brainscore_language/models/tinyllama1_1b/metadata.yml b/brainscore_language/models/tinyllama1_1b/metadata.yml new file mode 100644 index 00000000..3a40943a --- /dev/null +++ b/brainscore_language/models/tinyllama1_1b/metadata.yml @@ -0,0 +1,15 @@ +models: + tinyllama1_1b: + architecture: DCNN + model_family: tinyllama1_1b + total_parameter_count: 1234567 + trainable_parameter_count: 1234567 + total_layers: 55 + trainable_layers: 40 + model_size_mb: 1202 + training_dataset: null + task_specialization: null + brainscore_link: https://github.com/brain-score/language/tree/master/brainscore_language/models/tinyllama1_1b + huggingface_link: null + extra_notes: Temporary hardcoded metadata - will be replaced with actual generation + runnable: true diff --git a/brainscore_language/models/tinyllama1_1b/test.py b/brainscore_language/models/tinyllama1_1b/test.py new file mode 100644 index 00000000..6e05240d --- /dev/null +++ b/brainscore_language/models/tinyllama1_1b/test.py @@ -0,0 +1,35 @@ +import numpy as np +import pytest + +from brainscore_language import load_model +from brainscore_language.artificial_subject import ArtificialSubject + + +@pytest.mark.memory_intense +def test_load_model(): + """Model can be loaded from the registry without errors.""" + model = load_model('tinyllama-1.1b') + assert model is not None + + +@pytest.mark.memory_intense +def test_identifier(): + model = load_model('tinyllama-1.1b') + assert model.identifier == 'tinyllama-1.1b' + + +@pytest.mark.memory_intense +def test_neural(): + """Model produces neural representations with the expected shape.""" + model = load_model('tinyllama-1.1b') + text = ['the quick brown fox', 'jumps over', 'the lazy dog'] + model.start_neural_recording( + recording_target=ArtificialSubject.RecordingTarget.language_system, + recording_type=ArtificialSubject.RecordingType.fMRI, + ) + representations = model.digest_text(text)['neural'] + assert len(representations['presentation']) == 3 + np.testing.assert_array_equal(representations['stimulus'], text) + assert len(representations['neuroid']) == 2048 + +