Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions brainscore_language/benchmarks/pereira2018/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,10 @@

def Pereira2018_243sentences():
return _Pereira2018ExperimentLinear(experiment='243sentences', ceiling_s3_kwargs=dict(
version_id='CHl_9aFHIWVnPW_njePfy28yzggKuUPw',
sha1='5e23de899883828f9c886aec304bc5aa0f58f66c',
raw_kwargs=dict(
version_id='uZye03ENmn.vKB5mARUGhcIY_DjShtPD',
sha1='525a6ac8c14ad826c63fdd71faeefb8ba542d5ac',
raw_kwargs=dict(
version_id='XVTo58Po5YrNjTuDIWrmfHI0nbN2MVZa',
sha1='34ba453dc7e8a19aed18cc9bca160e97b4a80be5'
)
)
Expand All @@ -27,13 +24,10 @@ def Pereira2018_243sentences():

def Pereira2018_384sentences():
return _Pereira2018ExperimentLinear(experiment='384sentences', ceiling_s3_kwargs=dict(
version_id='sjlnXr5wXUoGv6exoWu06C4kYI0KpZLk',
sha1='fc895adc52fd79cea3040961d65d8f736a9d3e29',
raw_kwargs=dict(
version_id='Hi74r9UKfpK0h0Bjf5DL.JgflGoaknrA',
sha1='ce2044a7713426870a44131a99bfc63d8843dae0',
raw_kwargs=dict(
version_id='m4dq_ouKWZkYtdyNPMSP0p6rqb7wcYpi',
sha1='fe9fb24b34fd5602e18e34006ac5ccc7d4c825b8'
)
)
Expand Down Expand Up @@ -76,8 +70,8 @@ def _load_data(self, experiment: str) -> NeuroidAssembly:
data.attrs['identifier'] = f"{data.identifier}.{experiment}"
return data

def _load_ceiling(self, identifier: str, version_id: str, sha1: str, assembly_prefix="ceiling_", raw_kwargs=None):
ceiling = load_from_s3(identifier, cls=Score, assembly_prefix=assembly_prefix, version_id=version_id, sha1=sha1)
def _load_ceiling(self, identifier: str, sha1: str, version_id: str = None, assembly_prefix="ceiling_", raw_kwargs=None):
ceiling = load_from_s3(identifier, sha1=sha1, cls=Score, assembly_prefix=assembly_prefix, version_id=version_id)
if raw_kwargs: # recursively load raw attributes
raw = self._load_ceiling(identifier=identifier, assembly_prefix=assembly_prefix + "raw_", **raw_kwargs)
ceiling.attrs['raw'] = raw
Expand Down
1 change: 0 additions & 1 deletion brainscore_language/data/blank2014/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,4 @@

data_registry['Blank2014.fROI'] = lambda: load_from_s3(
identifier="Blank2014.fROI",
version_id="qM.uLV8ltOHM297r2SaGteYMX4Vy.oHB",
sha1="af1e868821b897cb1684e4c8dcd33977121ef552")
3 changes: 1 addition & 2 deletions brainscore_language/data/blank2014/data_packaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@
def upload_blank2014():
assembly = load_blank2014()
upload_data_assembly(assembly,
assembly_identifier="Blank2014.fROI",
bucket_name="brainscore-language")
assembly_identifier="Blank2014.fROI")


# This file requires nltk_contrib to be installed to run. nltk_contrib is not part of requirements.txt because this
Expand Down
1 change: 0 additions & 1 deletion brainscore_language/data/fedorenko2016/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,4 @@

data_registry['Fedorenko2016.language'] = lambda: load_from_s3(
identifier="Fedorenko2016.language",
version_id="qvB7YZfEjbXEE64bODNLlQlZKWGpgPhy",
sha1="2966b6d78e972a72068aa6907377483f427e8d9a")
3 changes: 1 addition & 2 deletions brainscore_language/data/fedorenko2016/data_packaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@
def upload_fedorenko2016():
assembly = load_fedorenko2016()
upload_data_assembly(assembly,
assembly_identifier="Fedorenko2016.language",
bucket_name="brainscore-language")
assembly_identifier="Fedorenko2016.language")


# adapted from
Expand Down
1 change: 0 additions & 1 deletion brainscore_language/data/futrell2018/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
def load_assembly():
assembly = load_from_s3(
identifier="Futrell2018",
version_id="MpR.gIXN8UrUnqwQyj.kCrh4VWrBvsGf",
sha1="381ccc8038fbdb31235b5f3e1d350f359b5e287f")
assembly.attrs['bibtex'] = BIBTEX
return assembly
Expand Down
2 changes: 0 additions & 2 deletions brainscore_language/data/pereira2018/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@

data_registry['Pereira2018.language'] = lambda: load_from_s3(
identifier="Pereira2018.language",
version_id="fq0gh.P7ThLu6DWUulho5W_F.YTEhDqJ",
sha1="f8434b4022f5b2c862f0ff2854d5b3f5f2a7fb96")
data_registry['Pereira2018.auditory'] = lambda: load_from_s3(
identifier="Pereira2018.auditory",
version_id=".lCMuSrGBlsEgLtZDOApLlr3h2szCmoC",
sha1="08e576bd3b8caf64850bb879abf07ae228ff1f5f")
1 change: 0 additions & 1 deletion brainscore_language/data/tuckute2024/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,4 @@

data_registry["Tuckute2024.language"] = lambda: load_from_s3(
identifier="Tuckute2024.language",
version_id="BB.DbwqLB4OhDR64duqojNdL0CRd4RmG",
sha1="5c8fc7f3e24cc1af5f5296459377b638b6492641")
6 changes: 2 additions & 4 deletions brainscore_language/data/tuckute2024/data_packaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,14 @@
import numpy as np
import pandas as pd

from brainio.assemblies import NeuroidAssembly
from brainscore_core.supported_data_standards.brainio.assemblies import NeuroidAssembly
from pathlib import Path
from brainscore_language.utils.s3 import upload_data_assembly

def upload_tuckute2024():
assembly = load_tuckute2024_5subj(source=os.path.join(Path(__file__).parent, 'brain-lang-data_participant_20230728.csv'), roi="lang_LH_netw")
upload_data_assembly(assembly,
assembly_identifier=f"Tuckute2024.language",
bucket_name="brainscore-language"
)
assembly_identifier="Tuckute2024.language")

def groupby_coord(df: pd.DataFrame,
coord_col: str = 'item_id',
Expand Down
13 changes: 3 additions & 10 deletions brainscore_language/data/tuckute2024/test.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
import numpy as np

from brainscore_language import load_dataset
from brainscore_language.data.tuckute2024.data_packaging import load_tuckute2024_5subj


class TestData:
def test_language(self,
load_from_cache: bool = False):
if load_from_cache:
assembly = load_dataset('tuckute2024_5subj_lang_LH_netw')
else:
assembly = load_tuckute2024_5subj()
def test_language(self):
assembly = load_dataset('Tuckute2024.language')

assert assembly.dims == ('presentation', 'neuroid')
assert assembly.shape == (1000, 1)
Expand All @@ -19,6 +15,3 @@ def test_language(self,
assert np.unique(assembly.stimulus_id.values).shape[0] == 1000
assert assembly.neuroid_id.values == [1]
assert 1.28 < np.max(assembly.data) < 1.29



18 changes: 13 additions & 5 deletions brainscore_language/utils/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,15 @@

_logger = logging.getLogger(__name__)

# S3 bucket configuration, following the same pattern as brainscore_vision.
# The bucket path "brainscore-storage/brainscore-language" means:
# - actual S3 bucket: brainscore-storage
# - key prefix: brainscore-language/
_BUCKET = "brainscore-storage"
_FOLDER = "brainscore-language"

def upload_data_assembly(assembly, assembly_identifier, bucket_name="brainscore-language", assembly_prefix="assy_"):

def upload_data_assembly(assembly, assembly_identifier, bucket_name=_BUCKET, assembly_prefix="assy_"):
# adapted from
# https://github.com/mschrimpf/brainio/blob/8a40a3558d0b86072b9e221808f19005c7cb8c17/brainio/packaging.py#L217

Expand All @@ -19,21 +26,22 @@ def upload_data_assembly(assembly, assembly_identifier, bucket_name="brainscore-
assembly_store_identifier = assembly_prefix + assembly_identifier.replace(".", "_")
netcdf_file_name = assembly_store_identifier + ".nc"
target_netcdf_path = Path(fetch.get_local_data_path()) / assembly_store_identifier / netcdf_file_name
s3_key = netcdf_file_name
s3_key = f"{_FOLDER}/{netcdf_file_name}"

# write to disk and upload
netcdf_kf_sha1 = write_netcdf(assembly, target_netcdf_path)
response = upload_to_s3(target_netcdf_path, bucket_name, s3_key)
_logger.debug(f"Uploaded {assembly_store_identifier} to S3 "
f"with key={s3_key}, sha1={netcdf_kf_sha1}, version_id={response['VersionId']}: {response}")
f"with key={s3_key}, sha1={netcdf_kf_sha1}, version_id={response.get('VersionId')}: {response}")
response['sha1'] = netcdf_kf_sha1
return response


def load_from_s3(identifier, version_id, sha1, assembly_prefix="assy_", cls=NeuroidAssembly) -> DataAssembly:
def load_from_s3(identifier, sha1, version_id=None, assembly_prefix="assy_", cls=NeuroidAssembly) -> DataAssembly:
filename = f"{assembly_prefix}{identifier.replace('.', '_')}.nc"
remote_path = f"{_FOLDER}/{filename}"
file_path = fetch_file(location_type="S3",
location=f"https://brainscore-language.s3.amazonaws.com/{filename}",
location=f"https://{_BUCKET}.s3.amazonaws.com/{remote_path}",
version_id=version_id,
sha1=sha1)
loader = AssemblyLoader(cls=cls, file_path=file_path)
Expand Down