From c31a3838a87c4ec1be2475073336ceae8a48f60e Mon Sep 17 00:00:00 2001 From: Pierre Guetschel Date: Mon, 14 Apr 2025 22:16:46 +0200 Subject: [PATCH 01/11] Add annotations metadata --- mne/annotations.py | 143 ++++++++++++++++++++++++++++------ mne/tests/test_annotations.py | 29 ++++++- mne/tests/test_epochs.py | 36 ++++++--- 3 files changed, 174 insertions(+), 34 deletions(-) diff --git a/mne/annotations.py b/mne/annotations.py index 629ee7b20cb..59a423cbcbd 100644 --- a/mne/annotations.py +++ b/mne/annotations.py @@ -58,7 +58,7 @@ _datetime = datetime -def _check_o_d_s_c(onset, duration, description, ch_names): +def _check_o_d_s_c_m(onset, duration, description, ch_names, metadata): onset = np.atleast_1d(np.array(onset, dtype=float)) if onset.ndim != 1: raise ValueError( @@ -94,13 +94,36 @@ def _check_o_d_s_c(onset, duration, description, ch_names): _validate_type(name, str, f"ch_names[{ai}][{ci}]") ch_names = _ndarray_ch_names(ch_names) - if not (len(onset) == len(duration) == len(description) == len(ch_names)): + if metadata is not None: + pd = _check_pandas_installed(strict=True) + if not (hasattr(metadata, "iloc") and hasattr(metadata, "columns")): + raise ValueError( + f"Metadata must be a pandas DataFrame or None, got {type(metadata)}." + ) + for column in metadata.columns: + if column in ["onset", "duration", "description", "ch_names"]: + raise ValueError( + "Metadata cannot contain columns named 'onset', 'duration', " + f"'description', or 'ch_names'. Found columns {metadata.columns}." + ) + if not any( + np.issubdtype(metadata[column].dtype, t) + for t in [int, str, object, str] + ): + raise ValueError( + f"Metadata column '{column}' must have type int, float, or str, " + f"but got {metadata[column].dtype}." + ) + + if not (len(onset) == len(duration) == len(description) == len(ch_names)) or ( + metadata is not None and len(onset) != len(metadata) + ): raise ValueError( - "Onset, duration, description, and ch_names must be " + "Onset, duration, description, ch_names, and metadata must be " f"equal in sizes, got {len(onset)}, {len(duration)}, " f"{len(description)}, and {len(ch_names)}." ) - return onset, duration, description, ch_names + return onset, duration, description, ch_names, metadata def _ndarray_ch_names(ch_names): @@ -144,6 +167,8 @@ class Annotations: More precisely to this '%%Y-%%m-%%d %%H:%%M:%%S.%%f' particular case of the ISO8601 format where the delimiter between date and time is ' '. %(ch_names_annot)s + metadata: instance of pandas.DataFrame | None + Optional data frame containing metadata for each annotation. .. versionadded:: 0.23 @@ -274,10 +299,12 @@ class Annotations: :meth:`Raw.save() ` notes for details. """ # noqa: E501 - def __init__(self, onset, duration, description, orig_time=None, ch_names=None): + def __init__( + self, onset, duration, description, orig_time=None, ch_names=None, metadata=None + ): self._orig_time = _handle_meas_date(orig_time) - self.onset, self.duration, self.description, self.ch_names = _check_o_d_s_c( - onset, duration, description, ch_names + self.onset, self.duration, self.description, self.ch_names, self.metadata = ( + _check_o_d_s_c_m(onset, duration, description, ch_names, metadata) ) self._sort() # ensure we're sorted @@ -339,7 +366,11 @@ def __iadd__(self, other): f"{self.orig_time} != {other.orig_time})" ) return self.append( - other.onset, other.duration, other.description, other.ch_names + other.onset, + other.duration, + other.description, + other.ch_names, + other.metadata, ) def __iter__(self): @@ -350,7 +381,7 @@ def __iter__(self): for idx in range(len(self.onset)): yield self.__getitem__(idx, with_ch_names=with_ch_names) - def __getitem__(self, key, *, with_ch_names=None): + def __getitem__(self, key, *, with_ch_names=None, with_metadata=True): """Propagate indexing and slicing to the underlying numpy structure.""" if isinstance(key, int_like): out_keys = ("onset", "duration", "description", "orig_time") @@ -363,6 +394,9 @@ def __getitem__(self, key, *, with_ch_names=None): if with_ch_names or (with_ch_names is None and self._any_ch_names()): out_keys += ("ch_names",) out_vals += (self.ch_names[key],) + if with_metadata and self.metadata is not None: + out_keys += tuple(self.metadata.columns) + out_vals += tuple(self.metadata.iloc[key]) return OrderedDict(zip(out_keys, out_vals)) else: key = list(key) if isinstance(key, tuple) else key @@ -372,10 +406,11 @@ def __getitem__(self, key, *, with_ch_names=None): description=self.description[key], orig_time=self.orig_time, ch_names=self.ch_names[key], + metadata=self.metadata.iloc[key] if self.metadata is not None else None, ) @fill_doc - def append(self, onset, duration, description, ch_names=None): + def append(self, onset, duration, description, ch_names=None, metadata=None): """Add an annotated segment. Operates inplace. Parameters @@ -403,13 +438,20 @@ def append(self, onset, duration, description, ch_names=None): to not only ``list.append``, but also `list.extend `__. """ # noqa: E501 - onset, duration, description, ch_names = _check_o_d_s_c( - onset, duration, description, ch_names + onset, duration, description, ch_names, metadata = _check_o_d_s_c_m( + onset, duration, description, ch_names, metadata ) self.onset = np.append(self.onset, onset) self.duration = np.append(self.duration, duration) self.description = np.append(self.description, description) self.ch_names = np.append(self.ch_names, ch_names) + if (self.metadata is None) != (metadata is None): + raise ValueError( + "Either both or none of the appended metadata and the annotations metadata should be None" + ) + if metadata is not None: + pd = _check_pandas_installed(strict=True) + self.metadata = pd.concat([self.metadata, metadata], ignore_index=True) self._sort() return self @@ -436,6 +478,8 @@ def delete(self, idx): self.duration = np.delete(self.duration, idx) self.description = np.delete(self.description, idx) self.ch_names = np.delete(self.ch_names, idx) + if self.metadata is not None: + self.metadata = self.metadata.drop(index=self.metadata.iloc[idx].index) @fill_doc def to_data_frame(self, time_format="datetime"): @@ -466,6 +510,8 @@ def to_data_frame(self, time_format="datetime"): if self._any_ch_names(): df.update(ch_names=self.ch_names) df = pd.DataFrame(df) + if self.metadata is not None: + df = pd.concat([df, self.metadata], axis="columns", ignore_index=True) return df def count(self): @@ -567,6 +613,8 @@ def _sort(self): self.duration = self.duration[order] self.description = self.description[order] self.ch_names = self.ch_names[order] + if self.metadata is not None: + self.metadata = self.metadata.iloc[order] @verbose def crop( @@ -892,6 +940,20 @@ def get_annotations_per_epoch(self): this_annot["onset"] - this_tzero, this_annot["duration"], this_annot["description"], + OrderedDict( # metadata + [ + (k, v) + for k, v in this_annot.items() + if k + not in ( + "onset", + "duration", + "description", + "orig_time", + "ch_names", + ) + ] + ), ) # ...then add it to the correct sublist of `epoch_annot_list` epoch_annot_list[epo_ix].append(annot) @@ -941,13 +1003,12 @@ def add_annotations_to_metadata(self, overwrite=False): data = np.empty((len(self.events), 0)) metadata = pd.DataFrame(data=data) - if ( - any( - name in metadata.columns - for name in ["annot_onset", "annot_duration", "annot_description"] - ) - and not overwrite - ): + annot_columns = ["annot_onset", "annot_duration", "annot_description"] + annot_metadata_cols = [] + if self.annotations.metadata is not None: + annot_metadata_cols = self.annotations.metadata.columns.tolist() + annot_columns += annot_metadata_cols + if any(name in metadata.columns for name in annot_columns) and not overwrite: raise RuntimeError( "Metadata for Epochs already contains columns " '"annot_onset", "annot_duration", or "annot_description".' @@ -957,6 +1018,7 @@ def add_annotations_to_metadata(self, overwrite=False): # onsets, durations, and descriptions epoch_annot_list = self.get_annotations_per_epoch() onset, duration, description = [], [], [] + annot_metadata = {k: [] for k in annot_metadata_cols} for epoch_annot in epoch_annot_list: for ix, annot_prop in enumerate((onset, duration, description)): entry = [annot[ix] for annot in epoch_annot] @@ -967,11 +1029,17 @@ def add_annotations_to_metadata(self, overwrite=False): annot_prop.append(entry) + for col in annot_metadata_cols: + entry = [annot[3][col] for annot in epoch_annot] + annot_metadata[col].append(entry) + # Create a new Annotations column that is instantiated as an empty # list per Epoch. metadata["annot_onset"] = pd.Series(onset) metadata["annot_duration"] = pd.Series(duration) metadata["annot_description"] = pd.Series(description) + for col in annot_metadata_cols: + metadata[col] = pd.Series(annot_metadata[col]) # reset the metadata self.metadata = metadata @@ -984,6 +1052,20 @@ def _combine_annotations( """Combine a tuple of annotations.""" assert one is not None assert two is not None + if not (one.metadata is None) == (two.metadata is None): + raise ValueError( + "Cannot combine annotations with different metadata. " + "Either both must have metadata or neither." + ) + if one.metadata is not None: + if one.metadata.columns.tolist() != two.metadata.columns.tolist(): + raise ValueError( + "Cannot combine annotations with different metadata columns." + ) + pd = _check_pandas_installed(strict=True) + metadata = pd.concat([one.metadata, two.metadata], ignore_index=True) + else: + metadata = None shift = one_n_samples / sfreq # to the right by the number of samples shift += one_first_samp / sfreq # to the right by the offset shift -= two_first_samp / sfreq # undo its offset @@ -991,7 +1073,7 @@ def _combine_annotations( duration = np.concatenate([one.duration, two.duration]) description = np.concatenate([one.description, two.description]) ch_names = np.concatenate([one.ch_names, two.ch_names]) - return Annotations(onset, duration, description, one.orig_time, ch_names) + return Annotations(onset, duration, description, one.orig_time, ch_names, metadata) def _handle_meas_date(meas_date): @@ -1100,6 +1182,11 @@ def _write_annotations(fid, annotations): write_string( fid, FIFF.FIFF_MNE_EPOCHS_DROP_LOG, json.dumps(tuple(annotations.ch_names)) ) + if annotations.metadata is not None: + logger.warning( + "Writing annotations metadata to fif is not implemented yet. " + "The metadata will not be saved." + ) end_block(fid, FIFF.FIFFB_MNE_ANNOTATIONS) @@ -1128,11 +1215,16 @@ def _write_annotations_txt(fname, annot): for ci, ch in enumerate(annot.ch_names) ] ) + if annot.metadata is not None: + logger.warning( + "Writing annotations metadata to txt is not implemented yet. " + "The metadata will not be saved." + ) content += "\n" data = np.array(data, dtype=str).T assert data.ndim == 2 assert data.shape[0] == len(annot.onset) - assert data.shape[1] in (3, 4) + assert data.shape[1] >= 3 with open(fname, "wb") as fid: fid.write(content.encode()) np.savetxt(fid, data, delimiter=",", fmt="%s") @@ -1275,7 +1367,14 @@ def _read_annotations_csv(fname): _safe_name_list(val, "read", "annotation channel name") for val in df["ch_names"].values ] - return Annotations(onset, duration, description, orig_time, ch_names) + other_columns = df.columns.difference( + ["onset", "duration", "description", "ch_names"] + ) + if len(other_columns) > 0: + metadata = df[other_columns] + else: + metadata = None + return Annotations(onset, duration, description, orig_time, ch_names, metadata) def _read_brainstorm_annotations(fname, orig_time=None): diff --git a/mne/tests/test_annotations.py b/mne/tests/test_annotations.py index 4d0db170e2a..938996ec295 100644 --- a/mne/tests/test_annotations.py +++ b/mne/tests/test_annotations.py @@ -630,10 +630,29 @@ def test_annotation_epoching(): assert_equal([0, 2, 4], epochs.selection) -def test_annotation_concat(): +@pytest.mark.parametrize("with_metadata", [True, False]) +def test_annotation_concat(with_metadata): """Test if two Annotations objects can be concatenated.""" - a = Annotations([1, 2, 3], [5, 5, 8], ["a", "b", "c"], ch_names=[["1"], ["2"], []]) - b = Annotations([11, 12, 13], [1, 2, 2], ["x", "y", "z"], ch_names=[[], ["3"], []]) + if with_metadata: + pd = pytest.importorskip("pandas") + metadata = pd.DataFrame({"foo": [1, 2, 3], "bar": ["a", "b", "c"]}) + metadatb = pd.DataFrame({"foo": [4, 5, 6], "bar": ["d", "e", "f"]}) + else: + metadata, metadatb = None, None + a = Annotations( + [1, 2, 3], + [5, 5, 8], + ["a", "b", "c"], + ch_names=[["1"], ["2"], []], + metadata=metadata, + ) + b = Annotations( + [11, 12, 13], + [1, 2, 2], + ["x", "y", "z"], + ch_names=[[], ["3"], []], + metadata=metadatb, + ) # test + operator (does not modify a or b) c = a + b @@ -643,6 +662,10 @@ def test_annotation_concat(): assert_equal(len(a), 3) assert_equal(len(b), 3) assert_equal(len(c), 6) + if with_metadata: + pd.testing.assert_frame_equal( + c.metadata, pd.concat([metadata, metadatb], ignore_index=True) + ) # c should have updated channel names want_names = np.array([("1",), ("2",), (), (), ("3",), ()], dtype="O") diff --git a/mne/tests/test_epochs.py b/mne/tests/test_epochs.py index 88f2d9cdc13..662958a0853 100644 --- a/mne/tests/test_epochs.py +++ b/mne/tests/test_epochs.py @@ -479,12 +479,12 @@ def test_average_movements(): def _assert_drop_log_types(drop_log): __tracebackhide__ = True assert isinstance(drop_log, tuple), "drop_log should be tuple" - assert all(isinstance(log, tuple) for log in drop_log), ( - "drop_log[ii] should be tuple" - ) - assert all(isinstance(s, str) for log in drop_log for s in log), ( - "drop_log[ii][jj] should be str" - ) + assert all( + isinstance(log, tuple) for log in drop_log + ), "drop_log[ii] should be tuple" + assert all( + isinstance(s, str) for log in drop_log for s in log + ), "drop_log[ii][jj] should be str" def test_reject(): @@ -4917,9 +4917,15 @@ def test_add_channels_picks(): @pytest.mark.parametrize("first_samp", [0, 10]) @pytest.mark.parametrize( - "meas_date, orig_date", [[None, None], [np.pi, None], [np.pi, timedelta(seconds=1)]] + "meas_date, orig_date, with_metadata", + [ + [None, None, False], + [np.pi, None, False], + [np.pi, timedelta(seconds=1), False], + [None, None, True], + ], ) -def test_epoch_annotations(first_samp, meas_date, orig_date, tmp_path): +def test_epoch_annotations(first_samp, meas_date, orig_date, with_metadata, tmp_path): """Test Epoch Annotations from RawArray with dates. Tests the following cases crossed with each other: @@ -4927,7 +4933,7 @@ def test_epoch_annotations(first_samp, meas_date, orig_date, tmp_path): - with and without meas_date - with and without an orig_time set in Annotations """ - pytest.importorskip("pandas") + pd = pytest.importorskip("pandas") from pandas.testing import assert_frame_equal data = np.random.randn(2, 400) * 10e-12 @@ -4947,6 +4953,11 @@ def test_epoch_annotations(first_samp, meas_date, orig_date, tmp_path): duration=[ant_dur, ant_dur, ant_dur], description=["x", "y", "z"], orig_time=orig_date, + metadata=( + pd.DataFrame({"foo": [1, 2, 3], "bar": list("abc")}) + if with_metadata + else None + ), ) raw.set_annotations(ants) epochs = make_fixed_length_epochs(raw, duration=1, overlap=0.5) @@ -4957,6 +4968,9 @@ def test_epoch_annotations(first_samp, meas_date, orig_date, tmp_path): assert "annot_onset" in metadata.columns assert "annot_duration" in metadata.columns assert "annot_description" in metadata.columns + if with_metadata: + assert "foo" in metadata.columns + assert "bar" in metadata.columns # Test that writing and reading back these new metadata works temp_fname = tmp_path / "test-epo.fif" @@ -4969,6 +4983,10 @@ def test_epoch_annotations(first_samp, meas_date, orig_date, tmp_path): assert_array_equal(raw.annotations.onset, epochs.annotations.onset) assert_array_equal(raw.annotations.duration, epochs.annotations.duration) assert_array_equal(raw.annotations.description, epochs.annotations.description) + if with_metadata: + assert_frame_equal( + raw.annotations.metadata, epochs.annotations.metadata + ) # compare Epoch annotations with expected values epoch_ants = epochs.get_annotations_per_epoch() From ca21f12a37f8476cdf4b97fec236648771382686 Mon Sep 17 00:00:00 2001 From: Pierre Guetschel Date: Mon, 14 Apr 2025 22:17:28 +0200 Subject: [PATCH 02/11] Fix lint --- mne/tests/test_epochs.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mne/tests/test_epochs.py b/mne/tests/test_epochs.py index 662958a0853..7ef4d75e095 100644 --- a/mne/tests/test_epochs.py +++ b/mne/tests/test_epochs.py @@ -4984,9 +4984,7 @@ def test_epoch_annotations(first_samp, meas_date, orig_date, with_metadata, tmp_ assert_array_equal(raw.annotations.duration, epochs.annotations.duration) assert_array_equal(raw.annotations.description, epochs.annotations.description) if with_metadata: - assert_frame_equal( - raw.annotations.metadata, epochs.annotations.metadata - ) + assert_frame_equal(raw.annotations.metadata, epochs.annotations.metadata) # compare Epoch annotations with expected values epoch_ants = epochs.get_annotations_per_epoch() From 57fb50838fc8bc480b1b0acaf50f5035473f7fdf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 14 Apr 2025 20:28:56 +0000 Subject: [PATCH 03/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mne/tests/test_epochs.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mne/tests/test_epochs.py b/mne/tests/test_epochs.py index 7ef4d75e095..8699529ac4e 100644 --- a/mne/tests/test_epochs.py +++ b/mne/tests/test_epochs.py @@ -479,12 +479,12 @@ def test_average_movements(): def _assert_drop_log_types(drop_log): __tracebackhide__ = True assert isinstance(drop_log, tuple), "drop_log should be tuple" - assert all( - isinstance(log, tuple) for log in drop_log - ), "drop_log[ii] should be tuple" - assert all( - isinstance(s, str) for log in drop_log for s in log - ), "drop_log[ii][jj] should be str" + assert all(isinstance(log, tuple) for log in drop_log), ( + "drop_log[ii] should be tuple" + ) + assert all(isinstance(s, str) for log in drop_log for s in log), ( + "drop_log[ii][jj] should be str" + ) def test_reject(): From 187796c892931eff62f49452b33dcd3fd97e6553 Mon Sep 17 00:00:00 2001 From: Pierre Guetschel Date: Tue, 15 Apr 2025 12:12:03 +0200 Subject: [PATCH 04/11] Add read and write methods for fif and txt --- mne/_fiff/constants.py | 1 + mne/annotations.py | 97 +++++++++++++++++++++++++---------- mne/tests/test_annotations.py | 30 +++++++++-- 3 files changed, 97 insertions(+), 31 deletions(-) diff --git a/mne/_fiff/constants.py b/mne/_fiff/constants.py index cf604db530c..8642a703b48 100644 --- a/mne/_fiff/constants.py +++ b/mne/_fiff/constants.py @@ -1204,6 +1204,7 @@ # MNE Metadata Dataframes FIFF.FIFFB_MNE_METADATA = 3811 # metadata dataframes block +FIFF.FIFFB_MNE_ANNOTATIONS_METADATA = 3812 # metadata dataframes block # Table to match unrecognized channel location names to their known aliases CHANNEL_LOC_ALIASES = { diff --git a/mne/annotations.py b/mne/annotations.py index 59a423cbcbd..ad0823cd564 100644 --- a/mne/annotations.py +++ b/mne/annotations.py @@ -43,9 +43,11 @@ _mask_to_onsets_offsets, _on_missing, _pl, + _prepare_read_metadata, _stamp_to_dt, _validate_type, check_fname, + _prepare_write_metadata, fill_doc, int_like, logger, @@ -94,6 +96,13 @@ def _check_o_d_s_c_m(onset, duration, description, ch_names, metadata): _validate_type(name, str, f"ch_names[{ai}][{ci}]") ch_names = _ndarray_ch_names(ch_names) + if not (len(onset) == len(duration) == len(description) == len(ch_names)): + raise ValueError( + "Onset, duration, description and ch_names must be " + f"equal in sizes, got {len(onset)}, {len(duration)}, " + f"{len(description)}, and {len(ch_names)}." + ) + if metadata is not None: pd = _check_pandas_installed(strict=True) if not (hasattr(metadata, "iloc") and hasattr(metadata, "columns")): @@ -114,15 +123,12 @@ def _check_o_d_s_c_m(onset, duration, description, ch_names, metadata): f"Metadata column '{column}' must have type int, float, or str, " f"but got {metadata[column].dtype}." ) + if len(onset) != len(metadata): + raise ValueError( + "The length of metadata must match the number of annotations. " + f"Got {len(onset)} annotations and {len(metadata)} rows." + ) - if not (len(onset) == len(duration) == len(description) == len(ch_names)) or ( - metadata is not None and len(onset) != len(metadata) - ): - raise ValueError( - "Onset, duration, description, ch_names, and metadata must be " - f"equal in sizes, got {len(onset)}, {len(duration)}, " - f"{len(description)}, and {len(ch_names)}." - ) return onset, duration, description, ch_names, metadata @@ -511,7 +517,7 @@ def to_data_frame(self, time_format="datetime"): df.update(ch_names=self.ch_names) df = pd.DataFrame(df) if self.metadata is not None: - df = pd.concat([df, self.metadata], axis="columns", ignore_index=True) + df = pd.concat([df, self.metadata], axis="columns") return df def count(self): @@ -1182,13 +1188,14 @@ def _write_annotations(fid, annotations): write_string( fid, FIFF.FIFF_MNE_EPOCHS_DROP_LOG, json.dumps(tuple(annotations.ch_names)) ) - if annotations.metadata is not None: - logger.warning( - "Writing annotations metadata to fif is not implemented yet. " - "The metadata will not be saved." - ) end_block(fid, FIFF.FIFFB_MNE_ANNOTATIONS) + if annotations.metadata is not None: + start_block(fid, FIFF.FIFFB_MNE_ANNOTATIONS_METADATA) + metadata = _prepare_write_metadata(annotations.metadata) + write_string(fid, FIFF.FIFF_DESCRIPTION, metadata) + end_block(fid, FIFF.FIFFB_MNE_ANNOTATIONS_METADATA) + def _write_annotations_csv(fname, annot): annot = annot.to_data_frame() @@ -1216,10 +1223,10 @@ def _write_annotations_txt(fname, annot): ] ) if annot.metadata is not None: - logger.warning( - "Writing annotations metadata to txt is not implemented yet. " - "The metadata will not be saved." - ) + for col in annot.metadata.columns: + content += f", {col}" + data.append(annot.metadata[col].values) + content += "\n" data = np.array(data, dtype=str).T assert data.ndim == 2 @@ -1427,28 +1434,49 @@ def _read_annotations_txt_parse_header(fname): def is_orig_time(x): return x.startswith("# orig_time :") + def is_columns(x): + return x.startswith("# onset, duration, description") + with open(fname) as fid: header = list(takewhile(lambda x: x.startswith("#"), fid)) orig_values = [h[13:].strip() for h in header if is_orig_time(h)] orig_values = [_handle_meas_date(orig) for orig in orig_values if _is_iso8601(orig)] - return None if not orig_values else orig_values[0] + columns = [[c.strip() for c in h[2:].split(",")] for h in header if is_columns(h)] + + return None if not orig_values else orig_values[0], ( + None if not columns else columns[0] + ) def _read_annotations_txt(fname): with warnings.catch_warnings(record=True): warnings.simplefilter("ignore") out = np.loadtxt(fname, delimiter=",", dtype=np.bytes_, unpack=True) + orig_time, columns = _read_annotations_txt_parse_header(fname) ch_names = None + metadata = None if len(out) == 0: onset, duration, desc = [], [], [] else: - _check_option("text header", len(out), (3, 4)) - if len(out) == 3: - onset, duration, desc = out + if columns is None: + _check_option("text header", len(out), (3, 4)) + columns = ["onset", "duration", "description"] + ( + ["ch_names"] if len(out) == 4 else [] + ) else: - onset, duration, desc, ch_names = out + _check_option( + "text header", columns[:3], (["onset", "duration", "description"],) + ) + _check_option("text header len", len(out), (len(columns),)) + onset, duration, desc = out[:3] + i = 3 + if len(columns) > i and columns[i] == "ch_names": + ch_names = out[i] + i += 1 + if len(columns) > i: + metadata = {columns[j]: out[j] for j in range(i, len(columns))} onset = [float(o.decode()) for o in np.atleast_1d(onset)] duration = [float(d.decode()) for d in np.atleast_1d(duration)] @@ -1458,8 +1486,11 @@ def _read_annotations_txt(fname): _safe_name_list(ch.decode().strip(), "read", f"ch_names[{ci}]") for ci, ch in enumerate(ch_names) ] - - orig_time = _read_annotations_txt_parse_header(fname) + if metadata is not None: + pd = _check_pandas_installed(strict=True) + metadata = pd.DataFrame( + {k: [d.decode() for d in np.atleast_1d(v)] for k, v in metadata.items()} + ) annotations = Annotations( onset=onset, @@ -1467,6 +1498,7 @@ def _read_annotations_txt(fname): description=desc, orig_time=orig_time, ch_names=ch_names, + metadata=metadata, ) return annotations @@ -1502,7 +1534,20 @@ def _read_annotations_fif(fid, tree): elif kind == FIFF.FIFF_MNE_EPOCHS_DROP_LOG: ch_names = tuple(tuple(x) for x in json.loads(tag.data)) assert len(onset) == len(duration) == len(description) - annotations = Annotations(onset, duration, description, orig_time, ch_names) + metadata = None + metadata_tree = dir_tree_find(tree, FIFF.FIFFB_MNE_ANNOTATIONS_METADATA) + if len(metadata_tree) > 0: + for dd in metadata_tree[0]["directory"]: + kind = dd.kind + pos = dd.pos + if kind == FIFF.FIFF_DESCRIPTION: + metadata = read_tag(fid, pos).data + metadata = _prepare_read_metadata(metadata) + break + annotations = Annotations( + onset, duration, description, orig_time, ch_names, metadata + ) + return annotations diff --git a/mne/tests/test_annotations.py b/mne/tests/test_annotations.py index 938996ec295..a298350fdfa 100644 --- a/mne/tests/test_annotations.py +++ b/mne/tests/test_annotations.py @@ -986,8 +986,8 @@ def _assert_annotations_equal(a, b, tol=0): _ORIG_TIME = datetime.fromtimestamp(1038942071.7201, timezone.utc) -@pytest.fixture(scope="function", params=("ch_names", "fmt")) -def dummy_annotation_file(tmp_path_factory, ch_names, fmt): +@pytest.fixture(scope="function", params=("ch_names", "fmt", "with_metadata")) +def dummy_annotation_file(tmp_path_factory, ch_names, fmt, with_metadata): """Create csv file for testing.""" if fmt == "csv": content = ( @@ -1005,7 +1005,9 @@ def dummy_annotation_file(tmp_path_factory, ch_names, fmt): ) else: assert fmt == "fif" - content = Annotations([0, 9], [1, 2.425], ["AA", "BB"], orig_time=_ORIG_TIME) + content = Annotations( + [0, 9], [1, 2.425], ["AA", "BB"], orig_time=_ORIG_TIME, metadata=None + ) if ch_names: if isinstance(content, Annotations): @@ -1017,6 +1019,17 @@ def dummy_annotation_file(tmp_path_factory, ch_names, fmt): content[-2] += "," content[-1] += ",MEG0111:MEG2563" content = "\n".join(content) + if with_metadata: + if isinstance(content, Annotations): + pd = pytest.importorskip("pandas") + content.metadata = pd.DataFrame({"foo": [1, 2], "bar": ["a", "b"]}) + else: + content = content.splitlines() + content[-3] += ",foo,bar" + content[-2] += ",1,a" + content[-1] += ",2,b" + content = "\n".join(content) + fname = tmp_path_factory.mktemp("data") / f"annotations-annot.{fmt}" if isinstance(content, str): @@ -1029,13 +1042,20 @@ def dummy_annotation_file(tmp_path_factory, ch_names, fmt): @pytest.mark.parametrize("ch_names", (False, True)) @pytest.mark.parametrize("fmt", [pytest.param("csv", marks=needs_pandas), "txt", "fif"]) -def test_io_annotation(dummy_annotation_file, tmp_path, fmt, ch_names): +@pytest.mark.parametrize( + "with_metadata", [pytest.param(True, marks=needs_pandas), False] +) +def test_io_annotation(dummy_annotation_file, tmp_path, fmt, ch_names, with_metadata): """Test CSV, TXT, and FIF input/output (which support ch_names).""" annot = read_annotations(dummy_annotation_file) assert annot.orig_time == _ORIG_TIME kwargs = dict(orig_time=_ORIG_TIME) if ch_names: kwargs["ch_names"] = ((), ("MEG0111", "MEG2563")) + if with_metadata: + pd = pytest.importorskip("pandas") + metadata = pd.DataFrame({"foo": [1, 2], "bar": ["a", "b"]}) + kwargs["metadata"] = metadata _assert_annotations_equal( annot, Annotations([0.0, 9.0], [1.0, 2.425], ["AA", "BB"], **kwargs), tol=1e-6 ) @@ -1146,7 +1166,7 @@ def test_read_annotation_txt_header(tmp_path): fname = tmp_path / "header.txt" with open(fname, "w") as f: f.write(content) - orig_time = _read_annotations_txt_parse_header(fname) + orig_time,_ = _read_annotations_txt_parse_header(fname) want = datetime.fromtimestamp(1038942071.7201, timezone.utc) assert orig_time == want From 9ca49dfe14802c67c8b158f508a10a2c589af285 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 15 Apr 2025 10:12:24 +0000 Subject: [PATCH 05/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mne/annotations.py | 2 +- mne/tests/test_annotations.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/mne/annotations.py b/mne/annotations.py index ad0823cd564..e28018204b1 100644 --- a/mne/annotations.py +++ b/mne/annotations.py @@ -44,10 +44,10 @@ _on_missing, _pl, _prepare_read_metadata, + _prepare_write_metadata, _stamp_to_dt, _validate_type, check_fname, - _prepare_write_metadata, fill_doc, int_like, logger, diff --git a/mne/tests/test_annotations.py b/mne/tests/test_annotations.py index a298350fdfa..df79c2d1975 100644 --- a/mne/tests/test_annotations.py +++ b/mne/tests/test_annotations.py @@ -1030,7 +1030,6 @@ def dummy_annotation_file(tmp_path_factory, ch_names, fmt, with_metadata): content[-1] += ",2,b" content = "\n".join(content) - fname = tmp_path_factory.mktemp("data") / f"annotations-annot.{fmt}" if isinstance(content, str): with open(fname, "w") as f: @@ -1166,7 +1165,7 @@ def test_read_annotation_txt_header(tmp_path): fname = tmp_path / "header.txt" with open(fname, "w") as f: f.write(content) - orig_time,_ = _read_annotations_txt_parse_header(fname) + orig_time, _ = _read_annotations_txt_parse_header(fname) want = datetime.fromtimestamp(1038942071.7201, timezone.utc) assert orig_time == want From f6a9e60e2ba31d9b45d8413f5a0e3e8f20c48f91 Mon Sep 17 00:00:00 2001 From: Pierre Guetschel Date: Tue, 15 Apr 2025 12:14:48 +0200 Subject: [PATCH 06/11] Fix pre-commit --- mne/annotations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mne/annotations.py b/mne/annotations.py index e28018204b1..241fc4b0f9d 100644 --- a/mne/annotations.py +++ b/mne/annotations.py @@ -104,7 +104,6 @@ def _check_o_d_s_c_m(onset, duration, description, ch_names, metadata): ) if metadata is not None: - pd = _check_pandas_installed(strict=True) if not (hasattr(metadata, "iloc") and hasattr(metadata, "columns")): raise ValueError( f"Metadata must be a pandas DataFrame or None, got {type(metadata)}." @@ -453,7 +452,8 @@ def append(self, onset, duration, description, ch_names=None, metadata=None): self.ch_names = np.append(self.ch_names, ch_names) if (self.metadata is None) != (metadata is None): raise ValueError( - "Either both or none of the appended metadata and the annotations metadata should be None" + "Either both or none of the appended metadata " + "and the annotations metadata should be None" ) if metadata is not None: pd = _check_pandas_installed(strict=True) From 79155d82e7c3c77eb4b84c8a20ec5bd5b178d4d8 Mon Sep 17 00:00:00 2001 From: Pierre Guetschel Date: Tue, 15 Apr 2025 12:26:29 +0200 Subject: [PATCH 07/11] Update changelog --- doc/changes/devel/13213.newfeature.rst | 1 + doc/changes/names.inc | 1 + 2 files changed, 2 insertions(+) create mode 100644 doc/changes/devel/13213.newfeature.rst diff --git a/doc/changes/devel/13213.newfeature.rst b/doc/changes/devel/13213.newfeature.rst new file mode 100644 index 00000000000..fb92f816954 --- /dev/null +++ b/doc/changes/devel/13213.newfeature.rst @@ -0,0 +1 @@ +Add a ``metadata`` attribute to :class:`mne.Annotations`, by `Pierre Guetschel`_. diff --git a/doc/changes/names.inc b/doc/changes/names.inc index 0d5ee6a5c73..21bf32b8704 100644 --- a/doc/changes/names.inc +++ b/doc/changes/names.inc @@ -232,6 +232,7 @@ .. _Phillip Alday: https://palday.bitbucket.io .. _Pierre Ablin: https://pierreablin.com .. _Pierre-Antoine Bannier: https://github.com/PABannier +.. _Pierre Guetschel: https://github.com/PierreGtch .. _Ping-Keng Jao: https://github.com/nafraw .. _Proloy Das: https://github.com/proloyd .. _Qian Chu: https://github.com/qian-chu From ea937cff496c671aded744000b5ab05cc250fbe7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 15 Apr 2025 10:26:50 +0000 Subject: [PATCH 08/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/changes/names.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/changes/names.inc b/doc/changes/names.inc index 21bf32b8704..80830d1ca98 100644 --- a/doc/changes/names.inc +++ b/doc/changes/names.inc @@ -231,8 +231,8 @@ .. _Peter Molfese: https://github.com/pmolfese .. _Phillip Alday: https://palday.bitbucket.io .. _Pierre Ablin: https://pierreablin.com -.. _Pierre-Antoine Bannier: https://github.com/PABannier .. _Pierre Guetschel: https://github.com/PierreGtch +.. _Pierre-Antoine Bannier: https://github.com/PABannier .. _Ping-Keng Jao: https://github.com/nafraw .. _Proloy Das: https://github.com/proloyd .. _Qian Chu: https://github.com/qian-chu From 7bc8c52da98a248875b4a9a7be9155e219de715a Mon Sep 17 00:00:00 2001 From: Pierre Guetschel Date: Tue, 15 Apr 2025 14:18:06 +0200 Subject: [PATCH 09/11] Fix docstring --- mne/annotations.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/mne/annotations.py b/mne/annotations.py index 241fc4b0f9d..c3c8a76abb2 100644 --- a/mne/annotations.py +++ b/mne/annotations.py @@ -172,11 +172,13 @@ class Annotations: More precisely to this '%%Y-%%m-%%d %%H:%%M:%%S.%%f' particular case of the ISO8601 format where the delimiter between date and time is ' '. %(ch_names_annot)s - metadata: instance of pandas.DataFrame | None - Optional data frame containing metadata for each annotation. .. versionadded:: 0.23 + metadata : pandas.DataFrame | None + Optional data frame containing metadata for each annotation. + .. versionadded:: 1.10.0 + See Also -------- mne.annotations_from_events @@ -211,6 +213,12 @@ class Annotations: from the raw instance, any channel-specific annotation that has no channels left in the raw instance will also be removed. + **metadata** + + Metadata is a pandas DataFrame that can contain any number of columns. + The number of rows must match the number of annotations. The metadata + columns can be used to store any additional information about the annotations. + **orig_time** If ``orig_time`` is None, the annotations are synced to the start of the From 08def4de742a8a08e684914df198fee35924721d Mon Sep 17 00:00:00 2001 From: Pierre Guetschel Date: Tue, 15 Apr 2025 14:57:52 +0200 Subject: [PATCH 10/11] Fix FIFF codes --- mne/_fiff/constants.py | 1 - mne/annotations.py | 27 ++++++++++----------------- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/mne/_fiff/constants.py b/mne/_fiff/constants.py index 8642a703b48..cf604db530c 100644 --- a/mne/_fiff/constants.py +++ b/mne/_fiff/constants.py @@ -1204,7 +1204,6 @@ # MNE Metadata Dataframes FIFF.FIFFB_MNE_METADATA = 3811 # metadata dataframes block -FIFF.FIFFB_MNE_ANNOTATIONS_METADATA = 3812 # metadata dataframes block # Table to match unrecognized channel location names to their known aliases CHANNEL_LOC_ALIASES = { diff --git a/mne/annotations.py b/mne/annotations.py index c3c8a76abb2..9fb2de0e643 100644 --- a/mne/annotations.py +++ b/mne/annotations.py @@ -178,7 +178,7 @@ class Annotations: Optional data frame containing metadata for each annotation. .. versionadded:: 1.10.0 - + See Also -------- mne.annotations_from_events @@ -1196,13 +1196,10 @@ def _write_annotations(fid, annotations): write_string( fid, FIFF.FIFF_MNE_EPOCHS_DROP_LOG, json.dumps(tuple(annotations.ch_names)) ) - end_block(fid, FIFF.FIFFB_MNE_ANNOTATIONS) - if annotations.metadata is not None: - start_block(fid, FIFF.FIFFB_MNE_ANNOTATIONS_METADATA) metadata = _prepare_write_metadata(annotations.metadata) - write_string(fid, FIFF.FIFF_DESCRIPTION, metadata) - end_block(fid, FIFF.FIFFB_MNE_ANNOTATIONS_METADATA) + write_string(fid, FIFF.FIFFB_MNE_METADATA, metadata) + end_block(fid, FIFF.FIFFB_MNE_ANNOTATIONS) def _write_annotations_csv(fname, annot): @@ -1519,7 +1516,7 @@ def _read_annotations_fif(fid, tree): annotations = None else: annot_data = annot_data[0] - orig_time = ch_names = None + orig_time = ch_names = metadata = None onset, duration, description = list(), list(), list() for ent in annot_data["directory"]: kind = ent.kind @@ -1541,17 +1538,13 @@ def _read_annotations_fif(fid, tree): orig_time = tuple(orig_time) # new way elif kind == FIFF.FIFF_MNE_EPOCHS_DROP_LOG: ch_names = tuple(tuple(x) for x in json.loads(tag.data)) + elif kind == FIFF.FIFFB_MNE_METADATA: + metadata = _prepare_read_metadata(tag.data) + assert len(onset) == len(duration) == len(description) - metadata = None - metadata_tree = dir_tree_find(tree, FIFF.FIFFB_MNE_ANNOTATIONS_METADATA) - if len(metadata_tree) > 0: - for dd in metadata_tree[0]["directory"]: - kind = dd.kind - pos = dd.pos - if kind == FIFF.FIFF_DESCRIPTION: - metadata = read_tag(fid, pos).data - metadata = _prepare_read_metadata(metadata) - break + if metadata is not None: + assert len(metadata) == len(onset) + annotations = Annotations( onset, duration, description, orig_time, ch_names, metadata ) From 013d69b2fe89b3ec6b288bef5c16dda30cb02c26 Mon Sep 17 00:00:00 2001 From: Pierre Guetschel Date: Wed, 16 Apr 2025 22:07:19 +0200 Subject: [PATCH 11/11] Add missing parameter annotation in docstring --- mne/annotations.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mne/annotations.py b/mne/annotations.py index 9fb2de0e643..baaebfa2e57 100644 --- a/mne/annotations.py +++ b/mne/annotations.py @@ -439,6 +439,11 @@ def append(self, onset, duration, description, ch_names=None, metadata=None): %(ch_names_annot)s .. versionadded:: 0.23 + metadata : pandas.DataFrame | None + Optional data frame containing metadata for each annotation. + The number of rows must match the number of annotations. + + .. versionadded:: 1.10.0 Returns -------