Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions brainscore_language/models/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""
Shared pytest fixtures for all model tests.

Some HuggingFace models (e.g. Gemma) are "gated" -- they require an authenticated
token to download weights. This conftest provides a session-scoped fixture that
automatically fetches a HuggingFace read token from AWS Secrets Manager and logs in
before any model test runs. If the HF_TOKEN environment variable is already set
(e.g. via `huggingface-cli login` or manual export), the AWS lookup is skipped.

Because this file lives in brainscore_language/models/, pytest automatically applies
its fixtures to every test in this directory and all model subdirectories (gemma/,
gpt/, etc.), so individual test files don't need any auth boilerplate.
"""

import json
import os

import boto3
import pytest
from huggingface_hub import login


@pytest.fixture(autouse=True, scope="session")
def set_hf_token():
"""Pull HuggingFace token from AWS Secrets Manager if not already set."""
if os.environ.get("HF_TOKEN"):
return
try:
client = boto3.client("secretsmanager", region_name="us-east-2")
resp = client.get_secret_value(SecretId="hugging_face_read_token")
secret = resp["SecretString"]
# Handle both plain string ("hf_...") and JSON ({"key": "hf_..."}) formats
try:
parsed = json.loads(secret)
if isinstance(parsed, dict):
token = next(iter(parsed.values()))
else:
token = str(parsed)
except (json.JSONDecodeError, StopIteration):
token = secret
token = token.strip()
os.environ["HF_TOKEN"] = token
login(token=token)
except Exception as e:
pytest.skip(f"HF_TOKEN not set and unable to fetch from AWS Secrets Manager: {e}")
12 changes: 12 additions & 0 deletions brainscore_language/models/gemma/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from brainscore_language import model_registry
from brainscore_language import ArtificialSubject
from brainscore_language.model_helpers.huggingface import HuggingfaceSubject

# Gemma 2B: 18 transformer layers, hidden size 2048.
# Layer 17 (last) chosen as default mapping pending benchmark-driven selection.
model_registry['gemma-2b'] = lambda: HuggingfaceSubject(
model_id='google/gemma-2b',
region_layer_mapping={
ArtificialSubject.RecordingTarget.language_system: 'model.layers.17'
},
)
15 changes: 15 additions & 0 deletions brainscore_language/models/gemma/metadata.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
models:
gemma:
architecture: DCNN
model_family: gemma
total_parameter_count: 1234567
trainable_parameter_count: 1234567
total_layers: 55
trainable_layers: 40
model_size_mb: 1202
training_dataset: null
task_specialization: null
brainscore_link: https://github.com/brain-score/language/tree/master/brainscore_language/models/gemma
huggingface_link: null
extra_notes: Temporary hardcoded metadata - will be replaced with actual generation
runnable: true
46 changes: 46 additions & 0 deletions brainscore_language/models/gemma/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import numpy as np
import pytest

from brainscore_language import load_model
from brainscore_language.artificial_subject import ArtificialSubject


@pytest.mark.memory_intense
def test_load_model():
"""Model can be loaded from the registry without errors."""
model = load_model('gemma-2b')
assert model is not None


@pytest.mark.memory_intense
def test_identifier():
model = load_model('gemma-2b')
assert model.identifier() == 'google/gemma-2b'


@pytest.mark.memory_intense
def test_neural():
"""Model produces neural representations with the expected shape."""
model = load_model('gemma-2b')
text = ['the quick brown fox', 'jumps over', 'the lazy dog']
model.start_neural_recording(
recording_target=ArtificialSubject.RecordingTarget.language_system,
recording_type=ArtificialSubject.RecordingType.fMRI,
)
representations = model.digest_text(text)['neural']
assert len(representations['presentation']) == 3
np.testing.assert_array_equal(representations['stimulus'], text)
assert len(representations['neuroid']) == 2048


@pytest.mark.memory_intense
def test_next_word():
"""Model can perform next-word prediction and returns a non-empty string."""
model = load_model('gemma-2b')
text = ['the quick brown fox', 'jumps over', 'the lazy']
model.start_behavioral_task(task=ArtificialSubject.Task.next_word)
next_words = model.digest_text(text)['behavior']
assert len(next_words) == 3
for word in next_words.values:
assert isinstance(word, str)
assert len(word.strip()) > 0
Loading