Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Annotations time formats #13109

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions doc/changes/devel/13109.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix reading annotations with :func:`mne.read_annotations` from .csv files containing nanoseconds in times, and make times saved in .csv files by :meth:`mne.Annotations.save` and returned from :meth:`mne.Annotations.to_data_frame` ISO8601 compliant, by `Thomas Binns`_.
25 changes: 23 additions & 2 deletions mne/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,8 @@ class Annotations:
the annotations with raw data if their acquisition is started at the
same time. If it is a string, it should conform to the ISO8601 format.
More precisely to this '%%Y-%%m-%%d %%H:%%M:%%S.%%f' particular case of
the ISO8601 format where the delimiter between date and time is ' '.
the ISO8601 format where the delimiter between date and time is ' ' and at most
microsecond precision (nanoseconds are not supported).
%(ch_names_annot)s

.. versionadded:: 0.23
Expand Down Expand Up @@ -276,6 +277,20 @@ class Annotations:

def __init__(self, onset, duration, description, orig_time=None, ch_names=None):
self._orig_time = _handle_meas_date(orig_time)
if isinstance(orig_time, str) and self._orig_time is None:
try: # only warn if `orig_time` is not the default '1970-01-01 00:00:00'
if _handle_meas_date(0) == datetime.strptime(
orig_time, "%Y-%m-%d %H:%M:%S"
).replace(tzinfo=timezone.utc):
pass
except ValueError: # error if incorrect datetime format AND not the default
warn(
"The format of the `orig_time` string is not recognised. It "
"must conform to the ISO8601 format with at most microsecond "
"precision and where the delimiter between date and time is "
"' '.",
RuntimeWarning,
)
self.onset, self.duration, self.description, self.ch_names = _check_o_d_s_c(
onset, duration, description, ch_names
)
Expand Down Expand Up @@ -1264,7 +1279,13 @@ def _read_annotations_csv(fname):
"onsets in seconds."
)
except ValueError:
pass
# remove nanoseconds for ISO8601 (microsecond) compliance
timestamp = pd.Timestamp(orig_time)
timespec = "microseconds"
if timestamp == pd.Timestamp(_handle_meas_date(0)).astimezone(None):
timespec = "auto" # use default timespec for `orig_time=None`
orig_time = timestamp.isoformat(sep=" ", timespec=timespec)

onset_dt = pd.to_datetime(df["onset"])
onset = (onset_dt - onset_dt[0]).dt.total_seconds()
duration = df["duration"].values.astype(float)
Expand Down
40 changes: 40 additions & 0 deletions mne/tests/test_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ def windows_like_datetime(monkeypatch):

def test_basics():
"""Test annotation class."""
pytest.importorskip("pandas")
import pandas as pd

raw = read_raw_fif(fif_fname)
assert raw.annotations is not None
assert len(raw.annotations.onset) == 0
Expand All @@ -95,6 +98,17 @@ def test_basics():
assert isinstance(annot.orig_time, datetime)
assert annot.orig_time.tzinfo is timezone.utc

# Test bad format `orig_time` str -> `None` raises warning
with pytest.warns(
RuntimeWarning, match="The format of the `orig_time` string is not recognised."
):
bad_orig_time = (
pd.Timestamp(_ORIG_TIME)
.astimezone(None)
.isoformat(sep=" ", timespec="nanoseconds")
)
Annotations(onset, duration, description, bad_orig_time)

pytest.raises(ValueError, Annotations, onset, duration, description[:9])
pytest.raises(ValueError, Annotations, [onset, 1], duration, description)
pytest.raises(ValueError, Annotations, onset, [duration, 1], description)
Expand Down Expand Up @@ -1041,6 +1055,24 @@ def test_broken_csv(tmp_path):
read_annotations(fname)


def test_nanosecond_csv(tmp_path):
"""Test .csv with nanosecond timestamps for onsets read correctly."""
pytest.importorskip("pandas")
import pandas as pd

onset = (
pd.Timestamp(_ORIG_TIME)
.astimezone(None)
.isoformat(sep=" ", timespec="nanoseconds")
)
content = f"onset,duration,description\n{onset},1.0,AA"
fname = tmp_path / "annotations_broken.csv"
with open(fname, "w") as f:
f.write(content)
annot = read_annotations(fname)
assert annot.orig_time == _ORIG_TIME


# Test for IO with .txt files


Expand Down Expand Up @@ -1462,6 +1494,8 @@ def test_repr():
def test_annotation_to_data_frame(time_format):
"""Test annotation class to data frame conversion."""
pytest.importorskip("pandas")
import pandas as pd

onset = np.arange(1, 10)
durations = np.full_like(onset, [4, 5, 6, 4, 5, 6, 4, 5, 6])
description = ["yy"] * onset.shape[0]
Expand All @@ -1481,6 +1515,12 @@ def test_annotation_to_data_frame(time_format):
assert want == got
assert df.groupby("description").count().onset["yy"] == 9

# Check nanoseconds omitted from onset times
if time_format == "datetime":
a.onset += 1e-7 # >6 decimals to trigger nanosecond component
df = a.to_data_frame(time_format=time_format)
assert pd.Timestamp(df.onset[0]).nanosecond == 0


def test_annotation_ch_names():
"""Test annotation ch_names updating and pruning."""
Expand Down
4 changes: 3 additions & 1 deletion mne/utils/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ def _convert_times(times, time_format, meas_date=None, first_time=0):
elif time_format == "timedelta":
times = to_timedelta(times, unit="s")
elif time_format == "datetime":
times = to_timedelta(times + first_time, unit="s") + meas_date
times = (to_timedelta(times + first_time, unit="s") + meas_date).astype(
"datetime64[us]"
) # make ISO8601 (microsecond) compatible
return times


Expand Down
Loading