Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions brainscore_language/models/oasm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from brainscore_language import model_registry
from .model import OASMSubject

# 49 OASM variants: sigma=0 (pure identity) + sigma=0.1..4.8 (paper's search grid).
model_registry['oasm-sigma0'] = lambda: OASMSubject(identifier='oasm-sigma0', sigma=0.0)
model_registry['oasm-sigma0.1'] = lambda: OASMSubject(identifier='oasm-sigma0.1', sigma=0.1)
model_registry['oasm-sigma0.2'] = lambda: OASMSubject(identifier='oasm-sigma0.2', sigma=0.2)
model_registry['oasm-sigma0.3'] = lambda: OASMSubject(identifier='oasm-sigma0.3', sigma=0.3)
model_registry['oasm-sigma0.4'] = lambda: OASMSubject(identifier='oasm-sigma0.4', sigma=0.4)
model_registry['oasm-sigma0.5'] = lambda: OASMSubject(identifier='oasm-sigma0.5', sigma=0.5)
model_registry['oasm-sigma0.6'] = lambda: OASMSubject(identifier='oasm-sigma0.6', sigma=0.6)
model_registry['oasm-sigma0.7'] = lambda: OASMSubject(identifier='oasm-sigma0.7', sigma=0.7)
model_registry['oasm-sigma0.8'] = lambda: OASMSubject(identifier='oasm-sigma0.8', sigma=0.8)
model_registry['oasm-sigma0.9'] = lambda: OASMSubject(identifier='oasm-sigma0.9', sigma=0.9)
model_registry['oasm-sigma1.0'] = lambda: OASMSubject(identifier='oasm-sigma1.0', sigma=1.0)
model_registry['oasm-sigma1.1'] = lambda: OASMSubject(identifier='oasm-sigma1.1', sigma=1.1)
model_registry['oasm-sigma1.2'] = lambda: OASMSubject(identifier='oasm-sigma1.2', sigma=1.2)
model_registry['oasm-sigma1.3'] = lambda: OASMSubject(identifier='oasm-sigma1.3', sigma=1.3)
model_registry['oasm-sigma1.4'] = lambda: OASMSubject(identifier='oasm-sigma1.4', sigma=1.4)
model_registry['oasm-sigma1.5'] = lambda: OASMSubject(identifier='oasm-sigma1.5', sigma=1.5)
model_registry['oasm-sigma1.6'] = lambda: OASMSubject(identifier='oasm-sigma1.6', sigma=1.6)
model_registry['oasm-sigma1.7'] = lambda: OASMSubject(identifier='oasm-sigma1.7', sigma=1.7)
model_registry['oasm-sigma1.8'] = lambda: OASMSubject(identifier='oasm-sigma1.8', sigma=1.8)
model_registry['oasm-sigma1.9'] = lambda: OASMSubject(identifier='oasm-sigma1.9', sigma=1.9)
model_registry['oasm-sigma2.0'] = lambda: OASMSubject(identifier='oasm-sigma2.0', sigma=2.0)
model_registry['oasm-sigma2.1'] = lambda: OASMSubject(identifier='oasm-sigma2.1', sigma=2.1)
model_registry['oasm-sigma2.2'] = lambda: OASMSubject(identifier='oasm-sigma2.2', sigma=2.2)
model_registry['oasm-sigma2.3'] = lambda: OASMSubject(identifier='oasm-sigma2.3', sigma=2.3)
model_registry['oasm-sigma2.4'] = lambda: OASMSubject(identifier='oasm-sigma2.4', sigma=2.4)
model_registry['oasm-sigma2.5'] = lambda: OASMSubject(identifier='oasm-sigma2.5', sigma=2.5)
model_registry['oasm-sigma2.6'] = lambda: OASMSubject(identifier='oasm-sigma2.6', sigma=2.6)
model_registry['oasm-sigma2.7'] = lambda: OASMSubject(identifier='oasm-sigma2.7', sigma=2.7)
model_registry['oasm-sigma2.8'] = lambda: OASMSubject(identifier='oasm-sigma2.8', sigma=2.8)
model_registry['oasm-sigma2.9'] = lambda: OASMSubject(identifier='oasm-sigma2.9', sigma=2.9)
model_registry['oasm-sigma3.0'] = lambda: OASMSubject(identifier='oasm-sigma3.0', sigma=3.0)
model_registry['oasm-sigma3.1'] = lambda: OASMSubject(identifier='oasm-sigma3.1', sigma=3.1)
model_registry['oasm-sigma3.2'] = lambda: OASMSubject(identifier='oasm-sigma3.2', sigma=3.2)
model_registry['oasm-sigma3.3'] = lambda: OASMSubject(identifier='oasm-sigma3.3', sigma=3.3)
model_registry['oasm-sigma3.4'] = lambda: OASMSubject(identifier='oasm-sigma3.4', sigma=3.4)
model_registry['oasm-sigma3.5'] = lambda: OASMSubject(identifier='oasm-sigma3.5', sigma=3.5)
model_registry['oasm-sigma3.6'] = lambda: OASMSubject(identifier='oasm-sigma3.6', sigma=3.6)
model_registry['oasm-sigma3.7'] = lambda: OASMSubject(identifier='oasm-sigma3.7', sigma=3.7)
model_registry['oasm-sigma3.8'] = lambda: OASMSubject(identifier='oasm-sigma3.8', sigma=3.8)
model_registry['oasm-sigma3.9'] = lambda: OASMSubject(identifier='oasm-sigma3.9', sigma=3.9)
model_registry['oasm-sigma4.0'] = lambda: OASMSubject(identifier='oasm-sigma4.0', sigma=4.0)
model_registry['oasm-sigma4.1'] = lambda: OASMSubject(identifier='oasm-sigma4.1', sigma=4.1)
model_registry['oasm-sigma4.2'] = lambda: OASMSubject(identifier='oasm-sigma4.2', sigma=4.2)
model_registry['oasm-sigma4.3'] = lambda: OASMSubject(identifier='oasm-sigma4.3', sigma=4.3)
model_registry['oasm-sigma4.4'] = lambda: OASMSubject(identifier='oasm-sigma4.4', sigma=4.4)
model_registry['oasm-sigma4.5'] = lambda: OASMSubject(identifier='oasm-sigma4.5', sigma=4.5)
model_registry['oasm-sigma4.6'] = lambda: OASMSubject(identifier='oasm-sigma4.6', sigma=4.6)
model_registry['oasm-sigma4.7'] = lambda: OASMSubject(identifier='oasm-sigma4.7', sigma=4.7)
model_registry['oasm-sigma4.8'] = lambda: OASMSubject(identifier='oasm-sigma4.8', sigma=4.8)
15 changes: 15 additions & 0 deletions brainscore_language/models/oasm/metadata.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
models:
oasm:
architecture: DCNN
model_family: oasm
total_parameter_count: 1234567
trainable_parameter_count: 1234567
total_layers: 55
trainable_layers: 40
model_size_mb: 1202
training_dataset: null
task_specialization: null
brainscore_link: https://github.com/brain-score/language/tree/master/brainscore_language/models/oasm
huggingface_link: null
extra_notes: Temporary hardcoded metadata - will be replaced with actual generation
runnable: true
126 changes: 126 additions & 0 deletions brainscore_language/models/oasm/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
"""
OASM: Orthogonal Autocorrelated Sequences Model.

A confound baseline model following Hadidi et al. (2025) "Illusions of Alignment Between
Large Language Models and Brains Emerge From Fragile Methods and Overlooked Confounds".

The model constructs an N x N identity matrix (N = total stimuli) and applies Gaussian
smoothing within each block (passage/sentence/story) along axis=1. This captures temporal
autocorrelation -- the fact that brain responses to nearby stimuli are more similar --
without encoding any linguistic content whatsoever.

Reference:
Hadidi et al. (2025). bioRxiv. https://doi.org/10.1101/2025.03.09.642245
Code: https://github.com/ebrahimfeghhi/beyond-brainscore
"""

import copy
import numpy as np
from scipy.ndimage import gaussian_filter1d
from typing import Union, List, Dict

from brainscore_core.supported_data_standards.brainio.assemblies import NeuroidAssembly, merge_data_arrays
from brainscore_language.artificial_subject import ArtificialSubject


class OASMSubject(ArtificialSubject):
"""
Orthogonal Autocorrelated Sequences Model (OASM) for brain-score evaluation.

For each block of K stimuli (passage, sentence, or story), constructs a K x K identity
matrix and applies ``scipy.ndimage.gaussian_filter1d`` along axis=1 with the given sigma.
Each block is placed at a unique offset in a fixed-size feature space, maintaining
between-block orthogonality while introducing within-block temporal autocorrelation.

This is mathematically equivalent to the paper's full N x N construction, adapted for
brain-score's per-block ``digest_text`` calling convention.

:param identifier: Unique model identifier (e.g., 'oasm-sigma1.0').
:param sigma: Gaussian smoothing width. Must be >= 0. sigma=0 gives pure identity.
:param max_features: Fixed dimensionality of the feature space. Must be >= total
number of stimuli across all blocks in a benchmark run. Default 2000.
"""

def __init__(self, identifier: str, sigma: float, max_features: int = 2000):
self._identifier = identifier
self._sigma = sigma
self._max_features = max_features
self._neural_recordings: list = []
self._offset: int = 0

def identifier(self) -> str:
return self._identifier

def start_behavioral_task(self, task: ArtificialSubject.Task):
raise NotImplementedError("OASM encodes no linguistic content and cannot perform behavioral tasks")

def start_neural_recording(self, recording_target: ArtificialSubject.RecordingTarget,
recording_type: ArtificialSubject.RecordingType):
self._neural_recordings.append((recording_target, recording_type))
self._offset = 0

def digest_text(self, text: Union[str, List[str]]) -> Dict[str, NeuroidAssembly]:
assert len(self._neural_recordings) > 0, "Must call start_neural_recording before digest_text"

if isinstance(text, str):
text = [text]
if isinstance(text, np.ndarray):
text = list(text)

block_size = len(text)

if self._offset + block_size > self._max_features:
raise ValueError(
f"Cumulative stimulus count ({self._offset + block_size}) exceeds "
f"max_features ({self._max_features}). Increase max_features."
)

# Build block: K x K identity, smoothed along axis=1
block_features = np.eye(block_size, dtype=np.float64)
if self._sigma > 0 and block_size > 1:
block_features = gaussian_filter1d(block_features, sigma=self._sigma, axis=1)

# Embed into D-dimensional space at the current offset
features = np.zeros((block_size, self._max_features), dtype=np.float64)
features[:, self._offset:self._offset + block_size] = block_features
self._offset += block_size

# Package each stimulus as a NeuroidAssembly
output = {'behavior': [], 'neural': []}
for part_number, stimulus in enumerate(text):
stimuli_coords = {
'stimulus': ('presentation', [stimulus]),
'part_number': ('presentation', [part_number]),
}
neural_assembly = self._package_representations(features[part_number], stimuli_coords=stimuli_coords)
output['neural'].append(neural_assembly)

output['neural'] = merge_data_arrays(output['neural']).sortby('part_number')
return output

def _package_representations(self, representation_values: np.ndarray,
stimuli_coords: dict) -> NeuroidAssembly:
"""Package a feature vector as a NeuroidAssembly matching the brain-score interface."""
layer_name = f'oasm_sigma{self._sigma}'
num_units = len(representation_values)

neuroid_coords = {
'layer': ('neuroid', [layer_name] * num_units),
'neuron_number_in_layer': ('neuroid', np.arange(num_units)),
'neuroid_id': ('neuroid', [f'{layer_name}--{i}' for i in range(num_units)]),
}

layer_representations = NeuroidAssembly(
[representation_values],
coords={**stimuli_coords, **neuroid_coords},
dims=['presentation', 'neuroid'])

representations = []
for recording_target, recording_type in self._neural_recordings:
current_representations = copy.deepcopy(layer_representations)
current_representations['recording_target'] = 'neuroid', [recording_target] * num_units
current_representations['recording_type'] = 'neuroid', [recording_type] * num_units
current_representations = type(current_representations)(current_representations)
representations.append(current_representations)
representations = merge_data_arrays(representations)
return representations
Loading
Loading