diff --git a/brainscore_language/models/conftest.py b/brainscore_language/models/conftest.py new file mode 100644 index 00000000..77c4fbab --- /dev/null +++ b/brainscore_language/models/conftest.py @@ -0,0 +1,45 @@ +""" +Shared pytest fixtures for all model tests. + +Some HuggingFace models (e.g. Gemma) are "gated" -- they require an authenticated +token to download weights. This conftest provides a session-scoped fixture that +automatically fetches a HuggingFace read token from AWS Secrets Manager and logs in +before any model test runs. If the HF_TOKEN environment variable is already set +(e.g. via `huggingface-cli login` or manual export), the AWS lookup is skipped. + +Because this file lives in brainscore_language/models/, pytest automatically applies +its fixtures to every test in this directory and all model subdirectories (gemma/, +gpt/, etc.), so individual test files don't need any auth boilerplate. +""" + +import json +import os + +import boto3 +import pytest +from huggingface_hub import login + + +@pytest.fixture(autouse=True, scope="session") +def set_hf_token(): + """Pull HuggingFace token from AWS Secrets Manager if not already set.""" + if os.environ.get("HF_TOKEN"): + return + try: + client = boto3.client("secretsmanager", region_name="us-east-2") + resp = client.get_secret_value(SecretId="hugging_face_read_token") + secret = resp["SecretString"] + # Handle both plain string ("hf_...") and JSON ({"key": "hf_..."}) formats + try: + parsed = json.loads(secret) + if isinstance(parsed, dict): + token = next(iter(parsed.values())) + else: + token = str(parsed) + except (json.JSONDecodeError, StopIteration): + token = secret + token = token.strip() + os.environ["HF_TOKEN"] = token + login(token=token) + except Exception as e: + pytest.skip(f"HF_TOKEN not set and unable to fetch from AWS Secrets Manager: {e}") diff --git a/brainscore_language/models/gemma/__init__.py b/brainscore_language/models/gemma/__init__.py new file mode 100644 index 00000000..67451db7 --- /dev/null +++ b/brainscore_language/models/gemma/__init__.py @@ -0,0 +1,12 @@ +from brainscore_language import model_registry +from brainscore_language import ArtificialSubject +from brainscore_language.model_helpers.huggingface import HuggingfaceSubject + +# Gemma 2B: 18 transformer layers, hidden size 2048. +# Layer 17 (last) chosen as default mapping pending benchmark-driven selection. +model_registry['gemma-2b'] = lambda: HuggingfaceSubject( + model_id='google/gemma-2b', + region_layer_mapping={ + ArtificialSubject.RecordingTarget.language_system: 'model.layers.17' + }, +) diff --git a/brainscore_language/models/gemma/metadata.yml b/brainscore_language/models/gemma/metadata.yml new file mode 100644 index 00000000..5dd8fabc --- /dev/null +++ b/brainscore_language/models/gemma/metadata.yml @@ -0,0 +1,15 @@ +models: + gemma: + architecture: DCNN + model_family: gemma + total_parameter_count: 1234567 + trainable_parameter_count: 1234567 + total_layers: 55 + trainable_layers: 40 + model_size_mb: 1202 + training_dataset: null + task_specialization: null + brainscore_link: https://github.com/brain-score/language/tree/master/brainscore_language/models/gemma + huggingface_link: null + extra_notes: Temporary hardcoded metadata - will be replaced with actual generation + runnable: true diff --git a/brainscore_language/models/gemma/test.py b/brainscore_language/models/gemma/test.py new file mode 100644 index 00000000..53cf763e --- /dev/null +++ b/brainscore_language/models/gemma/test.py @@ -0,0 +1,46 @@ +import numpy as np +import pytest + +from brainscore_language import load_model +from brainscore_language.artificial_subject import ArtificialSubject + + +@pytest.mark.memory_intense +def test_load_model(): + """Model can be loaded from the registry without errors.""" + model = load_model('gemma-2b') + assert model is not None + + +@pytest.mark.memory_intense +def test_identifier(): + model = load_model('gemma-2b') + assert model.identifier() == 'google/gemma-2b' + + +@pytest.mark.memory_intense +def test_neural(): + """Model produces neural representations with the expected shape.""" + model = load_model('gemma-2b') + text = ['the quick brown fox', 'jumps over', 'the lazy dog'] + model.start_neural_recording( + recording_target=ArtificialSubject.RecordingTarget.language_system, + recording_type=ArtificialSubject.RecordingType.fMRI, + ) + representations = model.digest_text(text)['neural'] + assert len(representations['presentation']) == 3 + np.testing.assert_array_equal(representations['stimulus'], text) + assert len(representations['neuroid']) == 2048 + + +@pytest.mark.memory_intense +def test_next_word(): + """Model can perform next-word prediction and returns a non-empty string.""" + model = load_model('gemma-2b') + text = ['the quick brown fox', 'jumps over', 'the lazy'] + model.start_behavioral_task(task=ArtificialSubject.Task.next_word) + next_words = model.digest_text(text)['behavior'] + assert len(next_words) == 3 + for word in next_words.values: + assert isinstance(word, str) + assert len(word.strip()) > 0